Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame] | 1 | #!/usr/bin/env perl |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 2 | package KorAP::VirtualCorpus; |
| 3 | use strict; |
| 4 | use warnings; |
| 5 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 6 | |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 7 | # Get or set name of the VC |
| 8 | sub name { |
| 9 | my $self = shift; |
| 10 | unless (@_) { |
| 11 | return $self->{name}; |
| 12 | }; |
| 13 | $self->{name} = shift; |
| 14 | return $self; |
| 15 | }; |
| 16 | |
| 17 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 18 | # Comment |
| 19 | sub comment { |
| 20 | my $self = shift; |
| 21 | unless (@_) { |
| 22 | return $self->{comment}; |
| 23 | }; |
| 24 | $self->{comment} //= []; |
| 25 | |
| 26 | push @{$self->{comment}}, shift; |
| 27 | return $self; |
| 28 | }; |
| 29 | |
| 30 | |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 31 | # Quote utility function |
| 32 | sub quote { |
| 33 | shift; |
| 34 | my $str = shift; |
| 35 | $str =~ s/(["\\])/\\$1/g; |
| 36 | return qq{"$str"}; |
| 37 | }; |
| 38 | |
| 39 | |
| 40 | # Escaped quote utility function |
| 41 | sub equote { |
| 42 | shift; |
| 43 | my $str = shift; |
| 44 | $str =~ s/(["\\])/\\$1/g; |
| 45 | $str =~ s/(["\\])/\\$1/g; |
| 46 | return '\\"' . $str . '\\"'; |
| 47 | }; |
| 48 | |
| 49 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 50 | sub _commentparam_to_string { |
| 51 | my $self = shift; |
| 52 | my $comment = $self->_comment_to_string; |
| 53 | if ($comment) { |
| 54 | return qq!,"comment":"$comment"!; |
| 55 | }; |
| 56 | return ''; |
| 57 | }; |
| 58 | |
| 59 | |
| 60 | sub _comment_to_string { |
| 61 | my $self = shift; |
| 62 | if (!$self->name && !$self->comment) { |
| 63 | return ''; |
| 64 | }; |
| 65 | |
| 66 | my $json = ''; |
| 67 | $json .= 'name:' . $self->equote($self->name) if $self->name; |
| 68 | if ($self->name && $self->comment) { |
| 69 | $json .= ',' |
| 70 | }; |
| 71 | $json .= join(',', @{$self->{comment}}) if $self->{comment}; |
| 72 | |
| 73 | return $json; |
| 74 | }; |
| 75 | |
| 76 | |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 77 | # Stringify globally |
| 78 | sub to_string { |
| 79 | my $self = shift; |
| 80 | ## Create collection object |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 81 | |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 82 | my $json = '{'; |
| 83 | $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",'; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 84 | $json .= '"collection":'; |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 85 | $json .= $self->_to_fragment; |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 86 | # Set at the end, when all comments are done |
| 87 | $json .= $self->_commentparam_to_string; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 88 | return $json .= '}'; |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 89 | }; |
| 90 | |
| 91 | |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 92 | package KorAP::VirtualCorpus::Group; |
| 93 | use strict; |
| 94 | use warnings; |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 95 | use base 'KorAP::VirtualCorpus'; |
| 96 | |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 97 | |
| 98 | # Construct a new VC group |
| 99 | sub new { |
| 100 | my $class = shift; |
| 101 | bless { |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 102 | with => [], |
| 103 | with_fields => {}, |
| 104 | without => [], |
| 105 | without_fields => {}, |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 106 | }, $class; |
| 107 | }; |
| 108 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 109 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 110 | # Define an operand to be "or"ed |
| 111 | sub with { |
| 112 | my $self = shift; |
| 113 | push @{$self->{with}}, shift; |
| 114 | }; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 115 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 116 | |
| 117 | # Define a field that should be "or"ed |
| 118 | sub with_field { |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 119 | my $self = shift; |
| 120 | my $field = shift; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 121 | push @{$self->{with_fields}->{$field}}, shift; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 122 | }; |
| 123 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 124 | # Define an operand to be "and"ed |
| 125 | sub without { |
| 126 | my $self = shift; |
| 127 | push @{$self->{without}}, shift; |
| 128 | }; |
| 129 | |
| 130 | |
| 131 | # Define a field that should be "and"ed |
| 132 | sub without_field { |
| 133 | my $self = shift; |
| 134 | my $field = shift; |
| 135 | push @{$self->{without_fields}->{$field}}, shift; |
| 136 | }; |
| 137 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 138 | |
| 139 | # VC contains only with fields |
| 140 | sub only_with_fields { |
| 141 | my $self = shift; |
| 142 | |
| 143 | if (keys %{$self->{without_fields}} || @{$self->{with}} || @{$self->{without}}) { |
| 144 | return 0; |
| 145 | }; |
| 146 | |
| 147 | return 1; |
| 148 | }; |
| 149 | |
| 150 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 151 | # Create a document vector field |
| 152 | sub _doc_vec { |
| 153 | my $field = shift; |
| 154 | my $vec = shift; |
| 155 | my $json = '{'; |
| 156 | $json .= '"@type":"koral:doc",'; |
| 157 | $json .= '"key":"' . $field . '",'; |
| 158 | $json .= '"match":"match:eq",'; |
| 159 | $json .= '"value":['; |
| 160 | $json .= join ',', map { '"' . $_ . '"' } @$vec; |
| 161 | $json .= ']'; |
| 162 | $json .= '},'; |
| 163 | return $json; |
| 164 | } |
| 165 | |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 166 | |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 167 | # Stringify fragment |
| 168 | sub _to_fragment { |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 169 | my $self = shift; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 170 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 171 | my $json = '{'; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 172 | $json .= '"@type":"koral:docGroup",'; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 173 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 174 | # Make the outer group "and" |
| 175 | if (keys %{$self->{without_fields}}) { |
| 176 | $json .= '"operation":"operation:and",'; |
| 177 | $json .= '"operands":['; |
| 178 | |
| 179 | foreach my $field (sort keys %{$self->{without_fields}}) { |
| 180 | unless (@{$self->{without_fields}->{$field}}) { |
| 181 | next; |
| 182 | }; |
| 183 | $json .= _doc_vec($field, $self->{without_fields}->{$field}); |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 184 | }; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 185 | |
| 186 | # Remove the last comma |
| 187 | chop $json; |
| 188 | |
| 189 | $json .= ']'; |
| 190 | } |
| 191 | |
| 192 | elsif (keys %{$self->{with_fields}} || @{$self->{with}}) { |
| 193 | $json .= '"operation":"operation:or",'; |
| 194 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 195 | $json .= '"operands":['; |
| 196 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 197 | # Flatten embedded "or"-VCs |
| 198 | foreach my $op (@{$self->{with}}) { |
| 199 | |
| 200 | # The embedded VC has only extending fields |
| 201 | if ($op->only_with_fields) { |
| 202 | |
| 203 | $self->comment('embed:[' . $op->_comment_to_string . ']'); |
| 204 | |
| 205 | foreach my $k (keys %{$op->{with_fields}}) { |
| 206 | foreach my $v (@{$op->{with_fields}->{$k}}) { |
| 207 | $self->with_field($k, $v); |
| 208 | }; |
| 209 | }; |
| 210 | } |
| 211 | |
| 212 | # Embed complex VC |
| 213 | else { |
| 214 | $json .= $op->_to_fragment . ','; |
| 215 | }; |
| 216 | }; |
| 217 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 218 | foreach my $field (sort keys %{$self->{with_fields}}) { |
| 219 | unless (@{$self->{with_fields}->{$field}}) { |
| 220 | next; |
| 221 | }; |
| 222 | $json .= _doc_vec($field, $self->{with_fields}->{$field}); |
| 223 | }; |
| 224 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 225 | # Remove the last comma |
| 226 | chop $json; |
| 227 | |
| 228 | $json .= ']'; |
| 229 | } |
| 230 | |
| 231 | # No operands in the group |
| 232 | else { |
| 233 | # Remove the last comma after the comment |
| 234 | chop $json; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 235 | }; |
| 236 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 237 | # Set at the end, when all comments are done |
| 238 | $json .= $self->_commentparam_to_string; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 239 | return $json . '}'; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 240 | }; |
| 241 | |
| 242 | |
| 243 | package main; |
Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame] | 244 | use strict; |
| 245 | use warnings; |
| 246 | |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 247 | # 2020-05-20 |
| 248 | # Preliminary support for C2 def-files. |
| 249 | |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 250 | our @ARGV; |
| 251 | |
Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame] | 252 | unless (@ARGV) { |
| 253 | print <<'HELP'; |
| 254 | Convert a line-separated list of corpus sigles, doc sigles or |
| 255 | text sigles into a virtual corpus query. |
| 256 | |
| 257 | $ perl list2vc.pl my_vc.txt | gzip -vc > my_vc.jsonld.gz |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 258 | $ cat my_vc.txt | perl list2vc.pl - | gzip -vc > my_vc.jsonld.gz |
Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame] | 259 | |
| 260 | HELP |
| 261 | exit 0; |
| 262 | }; |
| 263 | |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 264 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 265 | # Shorten long strings for logging |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 266 | sub _shorten ($) { |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 267 | my $line = shift; |
| 268 | if (length($line) < 20) { |
| 269 | return $line; |
| 270 | } |
| 271 | else { |
| 272 | return substr($line,0,17) . '...'; |
| 273 | }; |
| 274 | }; |
| 275 | |
| 276 | |
Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame] | 277 | my $fh; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 278 | if ($ARGV[0] eq '-') { |
| 279 | $fh = *STDIN; |
| 280 | } elsif (!open($fh, '<' . $ARGV[0])) { |
Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame] | 281 | warn $ARGV[0] . " can't be opened"; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 282 | exit(0); |
Akron | 3f875be | 2020-05-11 14:57:19 +0200 | [diff] [blame] | 283 | }; |
| 284 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 285 | # Initial VC group |
| 286 | my $vc; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 287 | |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 288 | # Create an intensional and an extensional VC |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 289 | my $vc_ext = KorAP::VirtualCorpus::Group->new; |
| 290 | my $vc_int = KorAP::VirtualCorpus::Group->new; |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 291 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 292 | # Load ext initially |
| 293 | $$vc = $vc_ext; |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 294 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 295 | # Collect all virtual corpora |
| 296 | my %all_vcs; |
| 297 | |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 298 | my $frozen = 0; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 299 | |
| 300 | # Iterate over the whole list |
| 301 | while (!eof $fh) { |
| 302 | my $line = readline($fh); |
| 303 | chomp $line; |
| 304 | |
Akron | e2645ec | 2020-05-20 12:37:25 +0200 | [diff] [blame] | 305 | |
| 306 | # Skip empty lines |
| 307 | if (!$line || length($line) == 0 || $line =~ /^[\s\t\n]*$/) { |
| 308 | # empty |
| 309 | next; |
| 310 | }; |
| 311 | |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 312 | my ($key, $value, $desc); |
| 313 | |
| 314 | # Line-Type: <e>c</a> |
| 315 | if ($line =~ /^\s*<([^>]+)>\s*([^<]*)\s*<\/\1>\s*$/) { |
| 316 | $key = $1; |
| 317 | $value = $2 // undef; |
| 318 | } |
| 319 | |
| 320 | # Line-Type: <e>c |
| 321 | elsif($line =~ /^\s*<([^>]+)>\s*([^<]+)\s*$/) { |
| 322 | $key = $1; |
| 323 | $value = $2; |
| 324 | } |
| 325 | |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 326 | # Get text sigles |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 327 | elsif ($line =~ m!^(?:\w+\/){2}\w+$!) { |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 328 | $key = 'text'; |
| 329 | $value = $line; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 330 | } |
| 331 | |
| 332 | # Get doc sigles |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 333 | elsif ($line =~ m!^(\w+\/\w+?)(?:\s.+?)?$!) { |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 334 | $key = 'doc'; |
| 335 | $value = $1; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 336 | } |
| 337 | |
| 338 | # Get corpus sigles |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 339 | elsif ($line !~ m!(?:\/|\s)!) { |
| 340 | $key = 'corpus'; |
| 341 | $value = $line; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 342 | } |
| 343 | |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 344 | # Not known |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 345 | else { |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 346 | warn _shorten($line) . q! isn't a valid VC definition!; |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 347 | next; |
| 348 | }; |
| 349 | |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 350 | # Add text field |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 351 | if ($key eq 'text') { |
Akron | 68746a1 | 2020-05-20 15:19:55 +0200 | [diff] [blame] | 352 | |
| 353 | # Convert C2 sigle to KorAP form |
| 354 | $value =~ s!^([^/]+?/[^\.]+?)\.(.+?)$!$1\/$2!; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 355 | ${$vc}->with_field(textSigle => $value); |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 356 | } |
| 357 | |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 358 | # Add doc field |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 359 | elsif ($key eq 'doc') { |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 360 | ${$vc}->with_field(docSigle => $value); |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 361 | } |
| 362 | |
Akron | 49c765f | 2020-05-20 16:41:22 +0200 | [diff] [blame] | 363 | # Add corpus field |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 364 | elsif ($key eq 'corpus') { |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 365 | ${$vc}->with_field(corpusSigle => $value); |
| 366 | } |
| 367 | |
| 368 | # Add corpus field |
| 369 | elsif ($key eq 'cn') { |
| 370 | # Korpussigle, z.B. 'F97 Frankfurter Allgemeine 1997' |
| 371 | if ($value =~ m!^([^\/\s]+)(?:\s.+?)?$!) { |
| 372 | ${$vc}->with_field(corpusSigle => $1); |
| 373 | }; |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 374 | } |
| 375 | |
| 376 | # Mark the vc as frozen |
| 377 | # This means that an extended VC area is expected |
| 378 | elsif ($key eq 'frozen') { |
| 379 | $frozen = 1; |
| 380 | } |
| 381 | |
| 382 | # Start/End intended VC area |
| 383 | elsif ($key eq 'intended') { |
| 384 | if ($value eq 'start') { |
| 385 | $$vc = $vc_int; |
| 386 | } |
| 387 | elsif ($value ne 'end') { |
| 388 | warn 'Unknown intension value ' . $value; |
| 389 | }; |
| 390 | } |
| 391 | |
| 392 | # Start/End extended VC area |
| 393 | elsif ($key eq 'extended') { |
| 394 | if ($value eq 'start') { |
| 395 | $$vc = $vc_ext; |
| 396 | } |
| 397 | elsif ($value ne 'end') { |
| 398 | warn 'Unknown extension value ' . $value; |
| 399 | }; |
| 400 | } |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 401 | |
| 402 | # Set VC name |
| 403 | elsif ($key eq 'name') { |
| 404 | # "Name des virt. Korpus, der angezeigt wird. |
| 405 | # Wird auch intern zur Korpusbildung referenziert, z.B. für <and>, |
| 406 | # <add>, <sub>" |
| 407 | |
| 408 | # No global name defined yet |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 409 | if ($$vc && !$$vc->name) { |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 410 | $vc_ext->name($value); |
| 411 | $vc_int->name($value); |
| 412 | next; |
| 413 | }; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 414 | |
| 415 | ${$vc} = KorAP::VirtualCorpus::Group->new; |
| 416 | ${$vc}->name($value); |
| 417 | } |
| 418 | |
| 419 | # End VC def |
| 420 | elsif ($key eq 'end') { |
| 421 | $all_vcs{${$vc}->name} = $$vc; |
| 422 | # $vc = undef; |
| 423 | } |
| 424 | |
| 425 | # Add VC definition |
| 426 | elsif ($key eq 'add') { |
| 427 | unless (defined $all_vcs{$value}) { |
| 428 | # warn 'VC ' . $value . ' not defined'; |
| 429 | # exit(1); |
| 430 | next; |
| 431 | }; |
| 432 | |
| 433 | $$vc->with($all_vcs{$value}); |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 434 | } |
| 435 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame^] | 436 | # Add reduction value as a comment |
| 437 | elsif ($key eq 'redabs') { |
| 438 | # "red. Anz. Texte |
| 439 | # absoluter Wert der durch Reduktion zu erzielende Anzahl Texte" |
| 440 | $$vc->comment('redabs:' . $value); |
| 441 | } |
| 442 | |
Akron | 3587f36 | 2020-05-20 17:50:38 +0200 | [diff] [blame] | 443 | # Unknown |
| 444 | else { |
| 445 | # warn $key . ' is an unknown field'; |
| 446 | }; |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 447 | }; |
| 448 | |
Akron | 26b5970 | 2020-05-19 12:14:41 +0200 | [diff] [blame] | 449 | close($fh); |
| 450 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 451 | # Stringify current (extended?) virtual corpus |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 452 | print $$vc->to_string; |