Test VC conversion tool and support STDIN
Change-Id: Ifbbe25115820733af20fa59d76d12d6ac98be1b1
diff --git a/tools/list2vc.pl b/tools/list2vc.pl
old mode 100644
new mode 100755
index 9a5f1e6..cc02746
--- a/tools/list2vc.pl
+++ b/tools/list2vc.pl
@@ -2,6 +2,8 @@
use strict;
use warnings;
+our @ARGV;
+
sub shorten ($) {
my $line = shift;
if (length($line) < 20) {
@@ -19,83 +21,88 @@
text sigles into a virtual corpus query.
$ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz
+ $ cat my_vc.txt | perl list2vc.pl - | gzip -vc > my_vc.jsonld.gz
HELP
exit 0;
};
my $fh;
-if (open($fh, '<' . $ARGV[0])) {
- my %data = (
- corpus => [],
- doc => [],
- text => []
- );
-
- # Iterate over the whole list
- while (!eof $fh) {
- my $line = readline($fh);
- chomp $line;
-
- # Get text sigles
- if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
- push @{$data{text}}, $line;
- }
-
- # Get doc sigles
- elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
- push @{$data{doc}}, $line;
- }
-
- # Get corpus sigles
- elsif ($line !~ m!\/!) {
- push @{$data{corpus}}, $line;
- }
-
- else {
- warn shorten($line) . q! isn't a valid sigle!;
- };
- };
-
- # Create collection object
- my $json = '{';
- $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
- $json .= '"collection":{';
-
- unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
- $json .= '}}';
- close($fh);
- print $json;
- exit(0);
- };
-
- $json .= '"@type":"koral:docGroup",';
- $json .= '"operation":"operation:or",';
- $json .= '"operands":[';
-
- foreach my $type (qw/corpus doc text/) {
- unless (@{$data{$type}}) {
- next;
- };
- $json .= '{';
- $json .= '"@type":"koral:doc",';
- $json .= '"key":"' . $type . 'Sigle",';
- $json .= '"match":"match:eq",';
- $json .= '"value":[';
- $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
- $json .= ']';
- $json .= '},';
- };
-
- # Remove the last comma
- chop $json;
-
- $json .= ']}}';
-
- close($fh);
-
- print $json;
-} else {
+if ($ARGV[0] eq '-') {
+ $fh = *STDIN;
+} elsif (!open($fh, '<' . $ARGV[0])) {
warn $ARGV[0] . " can't be opened";
+ exit(0);
};
+
+my %data = (
+ corpus => [],
+ doc => [],
+ text => []
+);
+
+# Iterate over the whole list
+while (!eof $fh) {
+ my $line = readline($fh);
+ chomp $line;
+
+ # Get text sigles
+ if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
+ push @{$data{text}}, $line;
+ }
+
+ # Get doc sigles
+ elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
+ push @{$data{doc}}, $line;
+ }
+
+ # Get corpus sigles
+ elsif ($line !~ m!\/!) {
+ push @{$data{corpus}}, $line;
+ }
+
+ else {
+ warn shorten($line) . q! isn't a valid sigle!;
+ };
+};
+
+# Create collection object
+my $json = '{';
+$json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
+$json .= '"collection":{';
+
+unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
+ $json .= '}}';
+ close($fh);
+ print $json;
+ exit(0);
+};
+
+$json .= '"@type":"koral:docGroup",';
+$json .= '"operation":"operation:or",';
+$json .= '"operands":[';
+
+foreach my $type (qw/corpus doc text/) {
+ unless (@{$data{$type}}) {
+ next;
+ };
+ $json .= '{';
+ $json .= '"@type":"koral:doc",';
+ $json .= '"key":"' . $type . 'Sigle",';
+ $json .= '"match":"match:eq",';
+ $json .= '"value":[';
+ $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
+ $json .= ']';
+ $json .= '},';
+};
+
+# Remove the last comma
+chop $json;
+
+$json .= ']}}';
+
+close($fh);
+
+print $json;
+