Make the script configurable
Change-Id: I264bd5b88ebec2650b9ae351c4a70cf7c281c7f2
diff --git a/bin/corpuslist_to_index b/bin/corpuslist_to_index
old mode 100755
new mode 100644
index e174343..3287ad0
--- a/bin/corpuslist_to_index
+++ b/bin/corpuslist_to_index
@@ -1,9 +1,35 @@
#!/usr/bin/env perl
use strict;
use warnings;
-use feature 'say';
-my $package_size = 15;
+# This script will generate commands to convert and index
+# a list of corpora with korapxml2krill and Krill
+
+use Getopt::Long qw(GetOptions :config no_auto_abbrev);
+use Pod::Usage;
+
+our $VERSION = '0.0.2';
+our $VERSION_MSG = "\ncorpuslist_to_index - v$VERSION\n";
+
+my $cmd;
+our @ARGV;
+if ($ARGV[0] && index($ARGV[0], '-') != 0) {
+ $cmd = shift @ARGV;
+};
+
+GetOptions(
+ 'batch|b=i' => \(my $package_size = 15),
+ 'help|h' => sub {
+ pod2usage(
+ -verbose => 99,
+ -sections => 'NAME|DESCRIPTION|SYNOPSIS|ARGUMENTS|OPTIONS',
+ -msg => $VERSION_MSG,
+ -output => '-'
+ )
+ }
+);
+
+use feature 'say';
my $corpus_c = $package_size;
my @lines = ();
@@ -74,4 +100,14 @@
$ cat corpuslist.txt | perl corpuslist_to_index > index_tasks.txt
+=head1 OPTIONS
+
+=over 2
+
+=item B<--batch|-b>
+
+Batch size, i.e. how many corpora are converted, extracted, indexed, ... at a time.
+
+=back
+
=cut