Test VC conversion tool and support STDIN
Change-Id: Ifbbe25115820733af20fa59d76d12d6ac98be1b1
diff --git a/tools/list2vc.pl b/tools/list2vc.pl
old mode 100644
new mode 100755
index 9a5f1e6..cc02746
--- a/tools/list2vc.pl
+++ b/tools/list2vc.pl
@@ -2,6 +2,8 @@
use strict;
use warnings;
+our @ARGV;
+
sub shorten ($) {
my $line = shift;
if (length($line) < 20) {
@@ -19,83 +21,88 @@
text sigles into a virtual corpus query.
$ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz
+ $ cat my_vc.txt | perl list2vc.pl - | gzip -vc > my_vc.jsonld.gz
HELP
exit 0;
};
my $fh;
-if (open($fh, '<' . $ARGV[0])) {
- my %data = (
- corpus => [],
- doc => [],
- text => []
- );
-
- # Iterate over the whole list
- while (!eof $fh) {
- my $line = readline($fh);
- chomp $line;
-
- # Get text sigles
- if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
- push @{$data{text}}, $line;
- }
-
- # Get doc sigles
- elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
- push @{$data{doc}}, $line;
- }
-
- # Get corpus sigles
- elsif ($line !~ m!\/!) {
- push @{$data{corpus}}, $line;
- }
-
- else {
- warn shorten($line) . q! isn't a valid sigle!;
- };
- };
-
- # Create collection object
- my $json = '{';
- $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
- $json .= '"collection":{';
-
- unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
- $json .= '}}';
- close($fh);
- print $json;
- exit(0);
- };
-
- $json .= '"@type":"koral:docGroup",';
- $json .= '"operation":"operation:or",';
- $json .= '"operands":[';
-
- foreach my $type (qw/corpus doc text/) {
- unless (@{$data{$type}}) {
- next;
- };
- $json .= '{';
- $json .= '"@type":"koral:doc",';
- $json .= '"key":"' . $type . 'Sigle",';
- $json .= '"match":"match:eq",';
- $json .= '"value":[';
- $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
- $json .= ']';
- $json .= '},';
- };
-
- # Remove the last comma
- chop $json;
-
- $json .= ']}}';
-
- close($fh);
-
- print $json;
-} else {
+if ($ARGV[0] eq '-') {
+ $fh = *STDIN;
+} elsif (!open($fh, '<' . $ARGV[0])) {
warn $ARGV[0] . " can't be opened";
+ exit(0);
};
+
+my %data = (
+ corpus => [],
+ doc => [],
+ text => []
+);
+
+# Iterate over the whole list
+while (!eof $fh) {
+ my $line = readline($fh);
+ chomp $line;
+
+ # Get text sigles
+ if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
+ push @{$data{text}}, $line;
+ }
+
+ # Get doc sigles
+ elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
+ push @{$data{doc}}, $line;
+ }
+
+ # Get corpus sigles
+ elsif ($line !~ m!\/!) {
+ push @{$data{corpus}}, $line;
+ }
+
+ else {
+ warn shorten($line) . q! isn't a valid sigle!;
+ };
+};
+
+# Create collection object
+my $json = '{';
+$json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
+$json .= '"collection":{';
+
+unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
+ $json .= '}}';
+ close($fh);
+ print $json;
+ exit(0);
+};
+
+$json .= '"@type":"koral:docGroup",';
+$json .= '"operation":"operation:or",';
+$json .= '"operands":[';
+
+foreach my $type (qw/corpus doc text/) {
+ unless (@{$data{$type}}) {
+ next;
+ };
+ $json .= '{';
+ $json .= '"@type":"koral:doc",';
+ $json .= '"key":"' . $type . 'Sigle",';
+ $json .= '"match":"match:eq",';
+ $json .= '"value":[';
+ $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
+ $json .= ']';
+ $json .= '},';
+};
+
+# Remove the last comma
+chop $json;
+
+$json .= ']}}';
+
+close($fh);
+
+print $json;
+
diff --git a/tools/t/data/list1.txt b/tools/t/data/list1.txt
new file mode 100644
index 0000000..001f85c
--- /dev/null
+++ b/tools/t/data/list1.txt
@@ -0,0 +1,6 @@
+A02
+A01/B02/c04
+A03
+B04/X02
+B04/X03
+A01/B02/c05
diff --git a/tools/t/list2vc.t b/tools/t/list2vc.t
new file mode 100644
index 0000000..93b5226
--- /dev/null
+++ b/tools/t/list2vc.t
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::More;
+use File::Basename;
+use File::Spec::Functions;
+
+use Test::Output;
+use Mojo::JSON 'decode_json';
+
+my $script = catfile(dirname(__FILE__), '..', 'list2vc.pl');
+my $list1 = catfile(dirname(__FILE__), 'data', 'list1.txt');
+
+# Check STDOUT
+stdout_like(
+ sub {
+ system($script, $list1);
+ },
+ qr!^\{\"\@context\".+?\}$!,
+ "check stdout"
+);
+
+# Check JSON
+my $json = decode_json(join('', `$script $list1`));
+
+is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type');
+is($json->{'collection'}->{'operation'}, 'operation:or', 'operation');
+
+my $op1 = $json->{'collection'}->{'operands'}->[0];
+is($op1->{'@type'}, 'koral:doc', 'type');
+is($op1->{'key'}, 'corpusSigle', 'key');
+is($op1->{'match'}, 'match:eq', 'match');
+is_deeply($op1->{'value'}, ["A02","A03"], 'value');
+
+my $op2 = $json->{'collection'}->{'operands'}->[1];
+is($op2->{'@type'}, 'koral:doc', 'type');
+is($op2->{'key'}, 'docSigle', 'key');
+is($op2->{'match'}, 'match:eq', 'match');
+is_deeply($op2->{'value'}, ["B04/X02","B04/X03"], 'value');
+
+my $op3 = $json->{'collection'}->{'operands'}->[2];
+is($op3->{'@type'}, 'koral:doc', 'type');
+is($op3->{'key'}, 'textSigle', 'key');
+is($op3->{'match'}, 'match:eq', 'match');
+is_deeply($op3->{'value'}, ["A01/B02/c04","A01/B02/c05"], 'value');
+
+
+# Check STDIN
+my $json2 = decode_json(join('', `cat $list1 | $script -`));
+is_deeply($json, $json2);
+
+done_testing;