blob: 9a5f1e67e556185dbcb3643981b9fe4cc7968766 [file] [log] [blame]
Akron3f875be2020-05-11 14:57:19 +02001#!/usr/bin/env perl
2use strict;
3use warnings;
4
5sub shorten ($) {
6 my $line = shift;
7 if (length($line) < 20) {
8 return $line;
9 }
10 else {
11 return substr($line,0,17) . '...';
12 };
13};
14
15
16unless (@ARGV) {
17 print <<'HELP';
18Convert a line-separated list of corpus sigles, doc sigles or
19text sigles into a virtual corpus query.
20
21 $ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz
22
23HELP
24exit 0;
25};
26
27my $fh;
28if (open($fh, '<' . $ARGV[0])) {
29 my %data = (
30 corpus => [],
31 doc => [],
32 text => []
33 );
34
35 # Iterate over the whole list
36 while (!eof $fh) {
37 my $line = readline($fh);
38 chomp $line;
39
40 # Get text sigles
41 if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
42 push @{$data{text}}, $line;
43 }
44
45 # Get doc sigles
46 elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
47 push @{$data{doc}}, $line;
48 }
49
50 # Get corpus sigles
51 elsif ($line !~ m!\/!) {
52 push @{$data{corpus}}, $line;
53 }
54
55 else {
56 warn shorten($line) . q! isn't a valid sigle!;
57 };
58 };
59
60 # Create collection object
61 my $json = '{';
62 $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
63 $json .= '"collection":{';
64
65 unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
66 $json .= '}}';
67 close($fh);
68 print $json;
69 exit(0);
70 };
71
72 $json .= '"@type":"koral:docGroup",';
73 $json .= '"operation":"operation:or",';
74 $json .= '"operands":[';
75
76 foreach my $type (qw/corpus doc text/) {
77 unless (@{$data{$type}}) {
78 next;
79 };
80 $json .= '{';
81 $json .= '"@type":"koral:doc",';
82 $json .= '"key":"' . $type . 'Sigle",';
83 $json .= '"match":"match:eq",';
84 $json .= '"value":[';
85 $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
86 $json .= ']';
87 $json .= '},';
88 };
89
90 # Remove the last comma
91 chop $json;
92
93 $json .= ']}}';
94
95 close($fh);
96
97 print $json;
98} else {
99 warn $ARGV[0] . " can't be opened";
100};
101