blob: c632ec7cb15577d2096c3937fbb9a90628ce2537 [file] [log] [blame]
Akron18e407a2020-05-11 14:57:19 +02001#!/usr/bin/env perl
Akron23e9e3c2020-05-20 12:37:25 +02002
3
4
5package main;
Akron18e407a2020-05-11 14:57:19 +02006use strict;
7use warnings;
8
Akron1e6f4d42020-05-19 12:14:41 +02009our @ARGV;
10
Akron18e407a2020-05-11 14:57:19 +020011sub shorten ($) {
12 my $line = shift;
13 if (length($line) < 20) {
14 return $line;
15 }
16 else {
17 return substr($line,0,17) . '...';
18 };
19};
20
21
22unless (@ARGV) {
23 print <<'HELP';
24Convert a line-separated list of corpus sigles, doc sigles or
25text sigles into a virtual corpus query.
26
27 $ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz
Akron1e6f4d42020-05-19 12:14:41 +020028 $ cat my_vc.txt | perl list2vc.pl - | gzip -vc > my_vc.jsonld.gz
Akron18e407a2020-05-11 14:57:19 +020029
30HELP
31exit 0;
32};
33
34my $fh;
Akron1e6f4d42020-05-19 12:14:41 +020035if ($ARGV[0] eq '-') {
36 $fh = *STDIN;
37} elsif (!open($fh, '<' . $ARGV[0])) {
Akron18e407a2020-05-11 14:57:19 +020038 warn $ARGV[0] . " can't be opened";
Akron1e6f4d42020-05-19 12:14:41 +020039 exit(0);
Akron18e407a2020-05-11 14:57:19 +020040};
41
Akron1e6f4d42020-05-19 12:14:41 +020042
43my %data = (
44 corpus => [],
45 doc => [],
46 text => []
47);
48
49# Iterate over the whole list
50while (!eof $fh) {
51 my $line = readline($fh);
52 chomp $line;
53
Akron23e9e3c2020-05-20 12:37:25 +020054
55 # Skip empty lines
56 if (!$line || length($line) == 0 || $line =~ /^[\s\t\n]*$/) {
57 # empty
58 next;
59 };
60
Akron1e6f4d42020-05-19 12:14:41 +020061 # Get text sigles
62 if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
63 push @{$data{text}}, $line;
64 }
65
66 # Get doc sigles
67 elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
68 push @{$data{doc}}, $line;
69 }
70
71 # Get corpus sigles
72 elsif ($line !~ m!\/!) {
73 push @{$data{corpus}}, $line;
74 }
75
76 else {
77 warn shorten($line) . q! isn't a valid sigle!;
78 };
79};
80
81# Create collection object
82my $json = '{';
83$json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
84$json .= '"collection":{';
85
86unless (@{$data{corpus}} || @{$data{doc}} || @{$data{text}}) {
87 $json .= '}}';
88 close($fh);
89 print $json;
90 exit(0);
91};
92
93$json .= '"@type":"koral:docGroup",';
94$json .= '"operation":"operation:or",';
95$json .= '"operands":[';
96
97foreach my $type (qw/corpus doc text/) {
98 unless (@{$data{$type}}) {
99 next;
100 };
101 $json .= '{';
102 $json .= '"@type":"koral:doc",';
103 $json .= '"key":"' . $type . 'Sigle",';
104 $json .= '"match":"match:eq",';
105 $json .= '"value":[';
106 $json .= join ',', map { '"' . $_ . '"' } @{$data{$type}};
107 $json .= ']';
108 $json .= '},';
109};
110
111# Remove the last comma
112chop $json;
113
114$json .= ']}}';
115
116close($fh);
117
118print $json;
119