blob: cc027463fce3b870c0aa8e6bf344f597e5b914cc [file] [log] [blame]
Akron18e407a2020-05-11 14:57:19 +02001#!/usr/bin/env perl
2use strict;
3use warnings;
4
Akron1e6f4d42020-05-19 12:14:41 +02005our @ARGV;
6
Akron18e407a2020-05-11 14:57:19 +02007sub shorten ($) {
8 my $line = shift;
9 if (length($line) < 20) {
10 return $line;
11 }
12 else {
13 return substr($line,0,17) . '...';
14 };
15};
16
17
18unless (@ARGV) {
19 print <<'HELP';
20Convert a line-separated list of corpus sigles, doc sigles or
21text sigles into a virtual corpus query.
22
23 $ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz
Akron1e6f4d42020-05-19 12:14:41 +020024 $ cat my_vc.txt | perl list2vc.pl - | gzip -vc > my_vc.jsonld.gz
Akron18e407a2020-05-11 14:57:19 +020025
26HELP
27exit 0;
28};
29
30my $fh;
Akron1e6f4d42020-05-19 12:14:41 +020031if ($ARGV[0] eq '-') {
32 $fh = *STDIN;
33} elsif (!open($fh, '<' . $ARGV[0])) {
Akron18e407a2020-05-11 14:57:19 +020034 warn $ARGV[0] . " can't be opened";
Akron1e6f4d42020-05-19 12:14:41 +020035 exit(0);
Akron18e407a2020-05-11 14:57:19 +020036};
37
Akron1e6f4d42020-05-19 12:14:41 +020038
39my %data = (
40 corpus => [],
41 doc => [],
42 text => []
43);
44
45# Iterate over the whole list
46while (!eof $fh) {
47 my $line = readline($fh);
48 chomp $line;
49
50 # Get text sigles
51 if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
52 push @{$data{text}}, $line;
53 }
54
55 # Get doc sigles
56 elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
57 push @{$data{doc}}, $line;
58 }
59
60 # Get corpus sigles
61 elsif ($line !~ m!\/!) {
62 push @{$data{corpus}}, $line;
63 }
64
65 else {
66 warn shorten($line) . q! isn't a valid sigle!;
67 };
68};
69
70# Create collection object
71my $json = '{';
72$json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
73$json .= '"collection":{';
74
75unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
76 $json .= '}}';
77 close($fh);
78 print $json;
79 exit(0);
80};
81
82$json .= '"@type":"koral:docGroup",';
83$json .= '"operation":"operation:or",';
84$json .= '"operands":[';
85
86foreach my $type (qw/corpus doc text/) {
87 unless (@{$data{$type}}) {
88 next;
89 };
90 $json .= '{';
91 $json .= '"@type":"koral:doc",';
92 $json .= '"key":"' . $type . 'Sigle",';
93 $json .= '"match":"match:eq",';
94 $json .= '"value":[';
95 $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
96 $json .= ']';
97 $json .= '},';
98};
99
100# Remove the last comma
101chop $json;
102
103$json .= ']}}';
104
105close($fh);
106
107print $json;
108