Add deflist generation

Change-Id: I28282a23e0fee65191db6f45d87a02664a19acdf
diff --git a/Makefile.PL b/Makefile.PL
index 8fa8fe0..7445d58 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -14,9 +14,11 @@
     'Test::More' => 0,
     'Test::Output' => 0,
     'File::Basename' => 0,
+    'File::Temp' => 0,
     'Data::Dumper' => 0,
     'File::Spec::Functions' => 0,
     'Mojolicious' => 0,
+    'IO::Uncompress::Bunzip2' => 0,
   },
   MIN_PERL_VERSION => '5.016',
   EXE_FILES => ['script/cosmasvc2koralquery'],
diff --git a/lib/KorAP/Def.pm b/lib/KorAP/Def.pm
index 5a4591d..b71dce8 100644
--- a/lib/KorAP/Def.pm
+++ b/lib/KorAP/Def.pm
@@ -1,5 +1,7 @@
 package KorAP::Def;
 use KorAP::VirtualCorpus::Group;
+use IO::Uncompress::Bunzip2 qw($Bunzip2Error);
+use IO::File;
 use strict;
 use warnings;
 
@@ -12,17 +14,37 @@
   if (ref $file && ref $file eq 'GLOB') {
     $self->{file} = '';
     $self->{fh} = $file;
+    return bless $self, $class;
   }
 
-  elsif ($file =~ /\.def\.bz2$/) {
+  if (!-f $file) {
+    if (-f $file . '.def') {
+      $file .= '.def';
+    }
+
+    elsif (-f $file . '.txt') {
+      $file .= '.txt';
+    }
+
+    elsif (-f $file . '.def.bz2') {
+      $file .= '.def.bz2';
+    }
+
+    else {
+      warn 'Unable to load def file from ' . $file;
+      return;
+    };
+  };
+
+  if ($file =~ /\.def\.bz2$/) {
     $self->{file} = $file;
-    # $self->{fh} = $file;
-    # TODO
+    $self->{fh} = IO::Uncompress::Bunzip2->new($file)
+      or die "bunzip2 failed: $Bunzip2Error\n";;
   }
 
   elsif (-f $file) {
     $self->{file} = $file;
- 
+
     # Open def file
     if (!open($self->{fh}, '<' . $file)) {
       warn $ARGV[0] . " can't be opened";
@@ -51,9 +73,11 @@
 
   my $frozen = 0;
 
+  my $fh = $self->{fh};
+
   # Iterate over the whole list
-  while (!eof($self->{fh})) {
-    my $line = readline($self->{fh});
+  while (!eof($fh)) {
+    my $line = <$fh>;
     chomp $line;
 
     # Skip empty lines
@@ -288,7 +312,7 @@
   };
   $self->{vc} = $vc;
 
-  close($self->{fh});
+  close($fh);
 };
 
 sub to_string {
diff --git a/lib/KorAP/DefList.pm b/lib/KorAP/DefList.pm
index 09fd1cb..076b97d 100644
--- a/lib/KorAP/DefList.pm
+++ b/lib/KorAP/DefList.pm
@@ -2,34 +2,32 @@
 use KorAP::Def;
 use KorAP::VirtualCorpus::Group;
 use KorAP::VirtualCorpus::Doc;
+use File::Spec::Functions qw!catfile!;
 use strict;
 use warnings;
 
 # Constructor
 sub new {
   my $class = shift;
-  my $file = shift;
+  my %self = @_;
 
-  warn 'xx';
+  $self{output} //= '.';
+
+  my $file = $self{file};
 
   if (ref $file && ref $file eq 'GLOB') {
-    return bless {
-      file => '',
-      fh => $file
-    }, $class;
-  };
-
-  my $self = {
-    file => $file
+    $self{file} = '';
+    $self{fh} = $file;
+    return bless \%self, $class;
   };
 
   # Open def file
-  if (!open($self->{fh}, '<' . $file)) {
-    warn $ARGV[0] . " can't be opened";
+  if (!open($self{fh}, '<' . $file)) {
+    warn $file . " can't be opened";
     exit(0);
   };
 
-  return bless $self, $class;
+  return bless \%self, $class;
 };
 
 
@@ -68,7 +66,25 @@
 sub copy_vc {
   my $self = shift;
   my $file = shift;
-  print "Convert ", $file, " from def file\n";
+
+  print "Convert ", $file, " from def-file\n";
+
+  # Open file from copy directory
+  my $def = KorAP::Def->new(catfile($self->{copy}, $file));
+
+  # Parse
+  return unless $def;
+  $def->parse or return;
+
+  # Output
+  my $out = catfile($self->{output}, $file . '.json');
+  if (open(my $koral, '>' . $out)) {
+    print $koral $def->to_string;
+    close($koral);
+    return;
+  };
+
+  warn 'Unable to write file ' . $out;
 };
 
 sub regex_to_vc {
@@ -76,7 +92,18 @@
   my ($name, $desc, $regex) = @_;
 
   print "Convert ", $name, " from regex\n";
-  return from_regex($regex, $name, $desc);
+
+  my $vc = from_regex($regex, $name, $desc);
+
+  # Output
+  my $out = catfile($self->{output}, $name . '.json');
+  if (open(my $koral, '>' . $out)) {
+    print $koral $vc->to_string;
+    close($koral);
+    return;
+  };
+
+  warn 'Unable to write file ' . $out;
 };
 
 sub from_regex {
diff --git a/script/cosmasvc2koralquery b/script/cosmasvc2koralquery
index a95605f..2e20568 100755
--- a/script/cosmasvc2koralquery
+++ b/script/cosmasvc2koralquery
@@ -19,10 +19,12 @@
 my $cmd = shift @ARGV;
 
 
-my $input;
+my ($input, $output, $copysrc);
 
 GetOptions (
-  "input|i=s" => \$input
+  "input|i=s" => \$input,
+  "output|o=s" => \$output,
+  "copy-src|c=s" => \$copysrc,
 )
 or die("Error in command line arguments\n");
 
@@ -34,7 +36,18 @@
   $ perl cosmasvc2koralquery def my_vc.txt | gzip -vc > my_vc.jsonld.gz
   $ cat my_vc.txt | perl cosmasvc2koralquery def - | gzip -vc > my_vc.jsonld.gz
 
-Commands: def, list
+Command: def
+
+  Convert a def file or a list of sigles to a KoralQuery VC.
+
+  Takes the list or def from STDIN and exports to STDOUT.
+
+Command: list
+
+  Convert a list with copy or regex instructions to KoralQuery VCs.
+
+  --output: The output directory
+  --copy-src: The directory for def files to copy
 
 HELP
 exit 1;
@@ -43,7 +56,11 @@
 
 # Process a list
 if ($cmd eq 'list') {
-  KorAP::DefList->new($input || $ARGV[0])->parse;
+  KorAP::DefList->new(
+    file => ($input || $ARGV[0]),
+    copy => ($copysrc || '.'),
+    output => ($output || '.')
+  )->parse;
   exit(0);
 };
 
diff --git a/t/data/list-example.ls b/t/data/list-example.ls
index 6878eef..4426046 100644
--- a/t/data/list-example.ls
+++ b/t/data/list-example.ls
@@ -14,7 +14,7 @@
 thm-lit	Belletristik/Trivialliteratur: Thomas-Mann-Korpus	THM/(AMB|AMD|AME|AMF|AMH|AMJ|AMK|AML|AMN|AMZ)
 wxx11	Wikipedia Artikel und Diskussionen	W[PD]D11/
 zca	Zeit Campus (Feb.-Apr.;Jun.;Aug.;Okt.-Dez.; Dez. 2013 n.v.)	ZCA[0-9][0-9]/
-corp-w-gesamt.2023-i.16.03.23	@COPY@
-corp-a	@COPY@
+list2	@COPY@
+list5	@COPY@
 misc-lit	Belletristik/Trivialliteratur (öffentlich)	(GOE/(AGD|AGM|AGN|AGV|AGW))|(MK1/(LBC|LBT|LFH|LGB|LJA|LMB|LSO|MHE|TJM|TPM))|(MK2/TRI)
 
diff --git a/t/data/list5.def.bz2 b/t/data/list5.def.bz2
new file mode 100644
index 0000000..d5215b8
--- /dev/null
+++ b/t/data/list5.def.bz2
Binary files differ
diff --git a/t/list2vc-deflist.t b/t/list2vc-deflist.t
index 41c2176..89577fb 100644
--- a/t/list2vc-deflist.t
+++ b/t/list2vc-deflist.t
@@ -5,24 +5,60 @@
 use File::Basename;
 use File::Spec::Functions;
 use Data::Dumper;
+use File::Temp qw/tempdir/;
 
 use Test::Output;
 use Mojo::JSON 'decode_json';
 
 my $script = catfile(dirname(__FILE__), '..', 'script', 'cosmasvc2koralquery');
+my $copysrc = catdir(dirname(__FILE__), 'data');
 my $list1 = catfile(dirname(__FILE__), 'data', 'list-example.ls');
+my $output = tempdir( CLEANUP => 1 );
+
+my @call = ($script, 'list', $list1, '--copy-src', $copysrc, '--output', $output);
 
 # Check STDOUT
-stdout_like(
+stderr_like(
   sub {
-    system($script, 'list', $list1);
+    system(@call);
   },
-  qr!Convert!,
+  qr!redabs is not yet supported!,
   "check stdout"
 );
 
 # Check JSON
-# my $protocol = join('', `$script list $list1`);
+my $protocol = join('', `@call`);
+
+like($protocol, qr!bih from regex!);
+ok(-f catfile($output, 'bih.json'));
+like($protocol, qr!bio from regex!);
+ok(-f catfile($output, 'bio.json'));
+like($protocol, qr!bio-pub from regex!);
+ok(-f catfile($output, 'bio-pub.json'));
+like($protocol, qr!l from regex!);
+ok(-f catfile($output, 'l.json'));
+like($protocol, qr!dpa from regex!);
+ok(-f catfile($output, 'dpa.json'));
+like($protocol, qr!fsp from regex!);
+ok(-f catfile($output, 'fsp.json'));
+like($protocol, qr!fsp-pub from regex!);
+ok(-f catfile($output, 'fsp-pub.json'));
+like($protocol, qr!kjl from regex!);
+ok(-f catfile($output, 'kjl.json'));
+like($protocol, qr!thm-lit from regex!);
+ok(-f catfile($output, 'thm-lit.json'));
+like($protocol, qr!wxx11 from regex!);
+ok(-f catfile($output, 'wxx11.json'));
+like($protocol, qr!zca from regex!);
+ok(-f catfile($output, 'zca.json'));
+like($protocol, qr!list2 from def-file!);
+ok(-f catfile($output, 'list2.json'));
+like($protocol, qr!list5 from def-file!);
+ok(-f catfile($output, 'list5.json'));
+like($protocol, qr!misc-lit from regex!);
+ok(-f catfile($output, 'misc-lit.json'));
 
 done_testing;
 __END__
+
+