Initial commit of slim_korapxml2krill_log

This is a script to take a log generated by korapxml2krill
and remove all irrelevant information - just leaving information
that helps to find problems.
Just takes the log file and generates a new log file with the
-slim suffix.

Change-Id: I5c8c35cbc9be73c9cde33195b470200d5ef60333
diff --git a/bin/slim_korapxml2krill_log b/bin/slim_korapxml2krill_log
new file mode 100644
index 0000000..f0146cf
--- /dev/null
+++ b/bin/slim_korapxml2krill_log
@@ -0,0 +1,72 @@
+#!/usr/bin/env perl
+use Mojo::Base -strict;
+use Mojo::File qw'path';
+
+our @ARGV;
+my ($unable, $unable_substring, $unable_offsets) = (0,0,0);
+
+my $file = path($ARGV[0]);
+my $out_fh = path($file->dirname)->child(
+  $file->basename('.log') . '-slim.log'
+)->open('>');
+
+my $fh = $file->open('<');
+
+# Iterate over file
+while (!eof($fh)){
+  local $_ = <$fh>;
+
+  if ($_ =~ qr!(?: Processed)! && $_ !~ qr!:1\/!) {
+    next;
+  };
+
+  if ($_ =~ qr! Unable to process !) {
+    $unable++;
+    next;
+  }
+  elsif ($_ =~ qr! Tokenization with failing offsets !) {
+    $unable_offsets++;
+    next;
+  }
+  elsif ($_ =~ qr! Unable to find substring !) {
+    $unable_substring++;
+    next;
+  }
+  elsif ($_ =~ qr!^Done\.$!) {
+    my $str = 'Done.';
+    $str .= ' [!Process: ' . $unable . ']' if $unable;
+    $str .= ' [!Offstes: ' . $unable_offsets . ']' if $unable_offsets;
+    $str .= ' [!Substring: ' . $unable_substring . ']' if $unable_substring;
+    $unable = 0;
+    $unable_substring = 0;
+    $unable_offsets = 0;
+    print $out_fh "## $str\n";
+    next;
+  };
+
+  if ($_ =~ qr! Unable to (?:process|find substring) !) {
+    next;
+  }
+  elsif ($_ =~ qr!substr outside of string!) {
+    next;
+  }
+  elsif ($_ =~ qr!with failing offsets!) {
+    next;
+  }
+  elsif ($_ =~ qr! in \/opt\/korap!) {
+    next;
+  };
+
+  print $out_fh $_;
+};
+
+$out_fh->close;
+$fh->close;
+
+__END__
+
+=pod
+
+  $ slim_korapxml2krill mylog.log
+
+=cut