Test VC conversion tool and support STDIN

Change-Id: Ifbbe25115820733af20fa59d76d12d6ac98be1b1
diff --git a/list2vc.pl b/list2vc.pl
old mode 100644
new mode 100755
index 9a5f1e6..cc02746
--- a/list2vc.pl
+++ b/list2vc.pl
@@ -2,6 +2,8 @@
 use strict;
 use warnings;
 
+our @ARGV;
+
 sub shorten ($) {
   my $line = shift;
   if (length($line) < 20) {
@@ -19,83 +21,88 @@
 text sigles into a virtual corpus query.
 
   $ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz
+  $ cat my_vc.txt | perl list2vc.pl - | gzip -vc > my_vc.jsonld.gz
 
 HELP
 exit 0;
 };
 
 my $fh;
-if (open($fh, '<' . $ARGV[0])) {
-  my %data = (
-    corpus => [],
-    doc => [],
-    text => []
-  );
-
-  # Iterate over the whole list
-  while (!eof $fh) {
-    my $line = readline($fh);
-    chomp $line;
-
-    # Get text sigles
-    if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
-      push @{$data{text}}, $line;
-    }
-
-    # Get doc sigles
-    elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
-      push @{$data{doc}}, $line;
-    }
-
-    # Get corpus sigles
-    elsif ($line !~ m!\/!) {
-      push @{$data{corpus}}, $line;
-    }
-
-    else {
-      warn shorten($line) . q! isn't a valid sigle!;
-    };
-  };
-
-  # Create collection object
-  my $json = '{';
-  $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
-  $json .= '"collection":{';
-
-  unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
-    $json .= '}}';
-    close($fh);
-    print $json;
-    exit(0);
-  };
-
-  $json .= '"@type":"koral:docGroup",';
-  $json .= '"operation":"operation:or",';
-  $json .= '"operands":[';
-
-  foreach my $type (qw/corpus doc text/) {
-    unless (@{$data{$type}}) {
-      next;
-    };
-    $json .= '{';
-    $json .= '"@type":"koral:doc",';
-    $json .= '"key":"' . $type . 'Sigle",';
-    $json .= '"match":"match:eq",';
-    $json .= '"value":[';
-    $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
-    $json .=  ']';
-    $json .= '},';
-  };
-
-  # Remove the last comma
-  chop $json;
-
-  $json .= ']}}';
-
-  close($fh);
-
-  print $json;
-} else {
+if ($ARGV[0] eq '-') {
+  $fh = *STDIN;
+} elsif (!open($fh, '<' . $ARGV[0])) {
   warn $ARGV[0] . " can't be opened";
+  exit(0);
 };
 
+
+my %data = (
+  corpus => [],
+  doc => [],
+  text => []
+);
+
+# Iterate over the whole list
+while (!eof $fh) {
+  my $line = readline($fh);
+  chomp $line;
+
+  # Get text sigles
+  if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
+    push @{$data{text}}, $line;
+  }
+
+  # Get doc sigles
+  elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
+    push @{$data{doc}}, $line;
+  }
+
+  # Get corpus sigles
+  elsif ($line !~ m!\/!) {
+    push @{$data{corpus}}, $line;
+  }
+
+  else {
+    warn shorten($line) . q! isn't a valid sigle!;
+  };
+};
+
+# Create collection object
+my $json = '{';
+$json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
+$json .= '"collection":{';
+
+unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
+  $json .= '}}';
+  close($fh);
+  print $json;
+  exit(0);
+};
+
+$json .= '"@type":"koral:docGroup",';
+$json .= '"operation":"operation:or",';
+$json .= '"operands":[';
+
+foreach my $type (qw/corpus doc text/) {
+  unless (@{$data{$type}}) {
+    next;
+  };
+  $json .= '{';
+  $json .= '"@type":"koral:doc",';
+  $json .= '"key":"' . $type . 'Sigle",';
+  $json .= '"match":"match:eq",';
+  $json .= '"value":[';
+  $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
+  $json .=  ']';
+  $json .= '},';
+};
+
+# Remove the last comma
+chop $json;
+
+$json .= ']}}';
+
+close($fh);
+
+print $json;
+
diff --git a/t/data/list1.txt b/t/data/list1.txt
new file mode 100644
index 0000000..001f85c
--- /dev/null
+++ b/t/data/list1.txt
@@ -0,0 +1,6 @@
+A02
+A01/B02/c04
+A03
+B04/X02
+B04/X03
+A01/B02/c05
diff --git a/t/list2vc.t b/t/list2vc.t
new file mode 100644
index 0000000..93b5226
--- /dev/null
+++ b/t/list2vc.t
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::More;
+use File::Basename;
+use File::Spec::Functions;
+
+use Test::Output;
+use Mojo::JSON 'decode_json';
+
+my $script = catfile(dirname(__FILE__), '..', 'list2vc.pl');
+my $list1 = catfile(dirname(__FILE__), 'data', 'list1.txt');
+
+# Check STDOUT
+stdout_like(
+  sub {
+    system($script, $list1);
+  },
+  qr!^\{\"\@context\".+?\}$!,
+  "check stdout"
+);
+
+# Check JSON
+my $json = decode_json(join('', `$script $list1`));
+
+is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type');
+is($json->{'collection'}->{'operation'}, 'operation:or', 'operation');
+
+my $op1 = $json->{'collection'}->{'operands'}->[0];
+is($op1->{'@type'}, 'koral:doc', 'type');
+is($op1->{'key'}, 'corpusSigle', 'key');
+is($op1->{'match'}, 'match:eq', 'match');
+is_deeply($op1->{'value'}, ["A02","A03"], 'value');
+
+my $op2 = $json->{'collection'}->{'operands'}->[1];
+is($op2->{'@type'}, 'koral:doc', 'type');
+is($op2->{'key'}, 'docSigle', 'key');
+is($op2->{'match'}, 'match:eq', 'match');
+is_deeply($op2->{'value'}, ["B04/X02","B04/X03"], 'value');
+
+my $op3 = $json->{'collection'}->{'operands'}->[2];
+is($op3->{'@type'}, 'koral:doc', 'type');
+is($op3->{'key'}, 'textSigle', 'key');
+is($op3->{'match'}, 'match:eq', 'match');
+is_deeply($op3->{'value'}, ["A01/B02/c04","A01/B02/c05"], 'value');
+
+
+# Check STDIN
+my $json2 = decode_json(join('', `cat $list1 | $script -`));
+is_deeply($json, $json2);
+
+done_testing;