| Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame^] | 1 | #!/usr/bin/env perl | 
 | 2 | use strict; | 
 | 3 | use warnings; | 
 | 4 |  | 
 | 5 | sub shorten ($) { | 
 | 6 |   my $line = shift; | 
 | 7 |   if (length($line) < 20) { | 
 | 8 |     return $line; | 
 | 9 |   } | 
 | 10 |   else { | 
 | 11 |     return substr($line,0,17) . '...'; | 
 | 12 |   }; | 
 | 13 | }; | 
 | 14 |  | 
 | 15 |  | 
 | 16 | unless (@ARGV) { | 
 | 17 |   print <<'HELP'; | 
 | 18 | Convert a line-separated list of corpus sigles, doc sigles or | 
 | 19 | text sigles into a virtual corpus query. | 
 | 20 |  | 
 | 21 |   $ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz | 
 | 22 |  | 
 | 23 | HELP | 
 | 24 | exit 0; | 
 | 25 | }; | 
 | 26 |  | 
 | 27 | my $fh; | 
 | 28 | if (open($fh, '<' . $ARGV[0])) { | 
 | 29 |   my %data = ( | 
 | 30 |     corpus => [], | 
 | 31 |     doc => [], | 
 | 32 |     text => [] | 
 | 33 |   ); | 
 | 34 |  | 
 | 35 |   # Iterate over the whole list | 
 | 36 |   while (!eof $fh) { | 
 | 37 |     my $line = readline($fh); | 
 | 38 |     chomp $line; | 
 | 39 |  | 
 | 40 |     # Get text sigles | 
 | 41 |     if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) { | 
 | 42 |       push @{$data{text}}, $line; | 
 | 43 |     } | 
 | 44 |  | 
 | 45 |     # Get doc sigles | 
 | 46 |     elsif ($line =~ m!^[^\/]+\/[^\/]+$!) { | 
 | 47 |       push @{$data{doc}}, $line; | 
 | 48 |     } | 
 | 49 |  | 
 | 50 |     # Get corpus sigles | 
 | 51 |     elsif ($line !~ m!\/!) { | 
 | 52 |       push @{$data{corpus}}, $line; | 
 | 53 |     } | 
 | 54 |  | 
 | 55 |     else { | 
 | 56 |       warn shorten($line) . q! isn't a valid sigle!; | 
 | 57 |     }; | 
 | 58 |   }; | 
 | 59 |  | 
 | 60 |   # Create collection object | 
 | 61 |   my $json = '{'; | 
 | 62 |   $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",'; | 
 | 63 |   $json .= '"collection":{'; | 
 | 64 |  | 
 | 65 |   unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) { | 
 | 66 |     $json .= '}}'; | 
 | 67 |     close($fh); | 
 | 68 |     print $json; | 
 | 69 |     exit(0); | 
 | 70 |   }; | 
 | 71 |  | 
 | 72 |   $json .= '"@type":"koral:docGroup",'; | 
 | 73 |   $json .= '"operation":"operation:or",'; | 
 | 74 |   $json .= '"operands":['; | 
 | 75 |  | 
 | 76 |   foreach my $type (qw/corpus doc text/) { | 
 | 77 |     unless (@{$data{$type}}) { | 
 | 78 |       next; | 
 | 79 |     }; | 
 | 80 |     $json .= '{'; | 
 | 81 |     $json .= '"@type":"koral:doc",'; | 
 | 82 |     $json .= '"key":"' . $type . 'Sigle",'; | 
 | 83 |     $json .= '"match":"match:eq",'; | 
 | 84 |     $json .= '"value":['; | 
 | 85 |     $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}}; | 
 | 86 |     $json .=  ']'; | 
 | 87 |     $json .= '},'; | 
 | 88 |   }; | 
 | 89 |  | 
 | 90 |   # Remove the last comma | 
 | 91 |   chop $json; | 
 | 92 |  | 
 | 93 |   $json .= ']}}'; | 
 | 94 |  | 
 | 95 |   close($fh); | 
 | 96 |  | 
 | 97 |   print $json; | 
 | 98 | } else { | 
 | 99 |   warn $ARGV[0] . " can't be opened"; | 
 | 100 | }; | 
 | 101 |  |