Added tool to create VC from list of sigles

Change-Id: I9903f70275c4ddaa09046dfa0d9553672d606ce8
diff --git a/.gitignore b/.gitignore
index bd4989d..752da96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@
 *.iml
 dependency-reduced-pom.xml
 admin_token
+/sandbox/
 /bin/
 /db.sqlite
 /lite/liteDB.sqlite
diff --git a/core/Changes b/core/Changes
index bec0bd2..12117b3 100644
--- a/core/Changes
+++ b/core/Changes
@@ -2,7 +2,9 @@
 24/01/2020
    - Removed salt from config and updated config files.
 05/02/2020
-   - Added welcome page.   
+   - Added welcome page.
+11/05/2020
+   - Added tool to create VC from list (diewald)
 
 # version 0.62.3
 03/12/2019
diff --git a/tools/list2vc.pl b/tools/list2vc.pl
new file mode 100644
index 0000000..9a5f1e6
--- /dev/null
+++ b/tools/list2vc.pl
@@ -0,0 +1,101 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+sub shorten ($) {
+  my $line = shift;
+  if (length($line) < 20) {
+    return $line;
+  }
+  else {
+    return substr($line,0,17) . '...';
+  };
+};
+
+
+unless (@ARGV) {
+  print <<'HELP';
+Convert a line-separated list of corpus sigles, doc sigles or
+text sigles into a virtual corpus query.
+
+  $ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz
+
+HELP
+exit 0;
+};
+
+my $fh;
+if (open($fh, '<' . $ARGV[0])) {
+  my %data = (
+    corpus => [],
+    doc => [],
+    text => []
+  );
+
+  # Iterate over the whole list
+  while (!eof $fh) {
+    my $line = readline($fh);
+    chomp $line;
+
+    # Get text sigles
+    if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
+      push @{$data{text}}, $line;
+    }
+
+    # Get doc sigles
+    elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
+      push @{$data{doc}}, $line;
+    }
+
+    # Get corpus sigles
+    elsif ($line !~ m!\/!) {
+      push @{$data{corpus}}, $line;
+    }
+
+    else {
+      warn shorten($line) . q! isn't a valid sigle!;
+    };
+  };
+
+  # Create collection object
+  my $json = '{';
+  $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
+  $json .= '"collection":{';
+
+  unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
+    $json .= '}}';
+    close($fh);
+    print $json;
+    exit(0);
+  };
+
+  $json .= '"@type":"koral:docGroup",';
+  $json .= '"operation":"operation:or",';
+  $json .= '"operands":[';
+
+  foreach my $type (qw/corpus doc text/) {
+    unless (@{$data{$type}}) {
+      next;
+    };
+    $json .= '{';
+    $json .= '"@type":"koral:doc",';
+    $json .= '"key":"' . $type . 'Sigle",';
+    $json .= '"match":"match:eq",';
+    $json .= '"value":[';
+    $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
+    $json .=  ']';
+    $json .= '},';
+  };
+
+  # Remove the last comma
+  chop $json;
+
+  $json .= ']}}';
+
+  close($fh);
+
+  print $json;
+} else {
+  warn $ARGV[0] . " can't be opened";
+};
+