Flatten embedded VCs in vc conversion tool
Change-Id: Idcbad5f8e77d3fe6dc69d439b87ccf355360bd5b
diff --git a/tools/list2vc.pl b/tools/list2vc.pl
index a25bf29..35a205a 100755
--- a/tools/list2vc.pl
+++ b/tools/list2vc.pl
@@ -3,6 +3,7 @@
use strict;
use warnings;
+
# Get or set name of the VC
sub name {
my $self = shift;
@@ -14,6 +15,19 @@
};
+# Comment
+sub comment {
+ my $self = shift;
+ unless (@_) {
+ return $self->{comment};
+ };
+ $self->{comment} //= [];
+
+ push @{$self->{comment}}, shift;
+ return $self;
+};
+
+
# Quote utility function
sub quote {
shift;
@@ -33,15 +47,44 @@
};
+sub _commentparam_to_string {
+ my $self = shift;
+ my $comment = $self->_comment_to_string;
+ if ($comment) {
+ return qq!,"comment":"$comment"!;
+ };
+ return '';
+};
+
+
+sub _comment_to_string {
+ my $self = shift;
+ if (!$self->name && !$self->comment) {
+ return '';
+ };
+
+ my $json = '';
+ $json .= 'name:' . $self->equote($self->name) if $self->name;
+ if ($self->name && $self->comment) {
+ $json .= ','
+ };
+ $json .= join(',', @{$self->{comment}}) if $self->{comment};
+
+ return $json;
+};
+
+
# Stringify globally
sub to_string {
my $self = shift;
## Create collection object
+
my $json = '{';
$json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
- $json .= '"comment":"Name: ' . $self->equote($self->name) . '",' if $self->name;
$json .= '"collection":';
$json .= $self->_to_fragment;
+ # Set at the end, when all comments are done
+ $json .= $self->_commentparam_to_string;
return $json .= '}';
};
@@ -63,6 +106,7 @@
}, $class;
};
+
# Define an operand to be "or"ed
sub with {
my $self = shift;
@@ -91,6 +135,19 @@
push @{$self->{without_fields}->{$field}}, shift;
};
+
+# VC contains only with fields
+sub only_with_fields {
+ my $self = shift;
+
+ if (keys %{$self->{without_fields}} || @{$self->{with}} || @{$self->{without}}) {
+ return 0;
+ };
+
+ return 1;
+};
+
+
# Create a document vector field
sub _doc_vec {
my $field = shift;
@@ -113,7 +170,6 @@
my $json = '{';
$json .= '"@type":"koral:docGroup",';
- $json .= '"comment":"Name: ' . $self->equote($self->name) . '",' if $self->name;
# Make the outer group "and"
if (keys %{$self->{without_fields}}) {
@@ -136,10 +192,29 @@
elsif (keys %{$self->{with_fields}} || @{$self->{with}}) {
$json .= '"operation":"operation:or",';
- # TODO:
- # Flatten embedded or-VCs!
$json .= '"operands":[';
+ # Flatten embedded "or"-VCs
+ foreach my $op (@{$self->{with}}) {
+
+ # The embedded VC has only extending fields
+ if ($op->only_with_fields) {
+
+ $self->comment('embed:[' . $op->_comment_to_string . ']');
+
+ foreach my $k (keys %{$op->{with_fields}}) {
+ foreach my $v (@{$op->{with_fields}->{$k}}) {
+ $self->with_field($k, $v);
+ };
+ };
+ }
+
+ # Embed complex VC
+ else {
+ $json .= $op->_to_fragment . ',';
+ };
+ };
+
foreach my $field (sort keys %{$self->{with_fields}}) {
unless (@{$self->{with_fields}->{$field}}) {
next;
@@ -147,10 +222,6 @@
$json .= _doc_vec($field, $self->{with_fields}->{$field});
};
- foreach my $op (@{$self->{with}}) {
- $json .= $op->_to_fragment . ',';
- };
-
# Remove the last comma
chop $json;
@@ -163,6 +234,8 @@
chop $json;
};
+ # Set at the end, when all comments are done
+ $json .= $self->_commentparam_to_string;
return $json . '}';
};
@@ -209,13 +282,15 @@
exit(0);
};
+# Initial VC group
+my $vc;
# Create an intensional and an extensional VC
my $vc_ext = KorAP::VirtualCorpus::Group->new;
my $vc_int = KorAP::VirtualCorpus::Group->new;
-# Initial VC group
-my $vc = \$vc_ext;
+# Load ext initially
+$$vc = $vc_ext;
# Collect all virtual corpora
my %all_vcs;
@@ -358,6 +433,13 @@
$$vc->with($all_vcs{$value});
}
+ # Add reduction value as a comment
+ elsif ($key eq 'redabs') {
+ # "red. Anz. Texte
+ # absoluter Wert der durch Reduktion zu erzielende Anzahl Texte"
+ $$vc->comment('redabs:' . $value);
+ }
+
# Unknown
else {
# warn $key . ' is an unknown field';
diff --git a/tools/t/list2vc-def.t b/tools/t/list2vc-def.t
index d60ca8d..baaefda 100644
--- a/tools/t/list2vc-def.t
+++ b/tools/t/list2vc-def.t
@@ -50,15 +50,14 @@
is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type');
is($json->{'collection'}->{'operation'}, 'operation:or', 'operation');
-# is($json->{'collection'}->{'comment'}, 'Name: "VAS-N91 (Stand \"2013\", korr. 2017)"', 'type');
-is($json->{'collection'}->{'comment'}, 'Name: "VAS N91"', 'type');
+is($json->{'collection'}->{'comment'}, 'name:"VAS-N91 (Stand \"2013\", korr. 2017)"', 'type');
$op1 = $json->{'collection'}->{'operands'}->[0];
is($op1->{'@type'}, 'koral:doc', 'type');
is($op1->{'key'}, 'textSigle', 'key');
is($op1->{'match'}, 'match:eq', 'match');
is($op1->{'value'}->[0], "A00/APR/23232", 'value');
-is($op1->{'value'}->[1], ,"A00/APR/23233", 'value');
+is($op1->{'value'}->[1], "A00/APR/23233", 'value');
my $list4 = catfile(dirname(__FILE__), 'data', 'list4.def');
@@ -68,8 +67,15 @@
is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type');
is($json->{'collection'}->{'operation'}, 'operation:or', 'operation');
-# is($json->{'collection'}->{'comment'}, 'Name: "VAS-N91 (Stand \"2013\", korr. 2017)"', 'type');
-is($json->{'collection'}->{'comment'}, 'Name: "VAS N91"', 'type');
+like($json->{'collection'}->{'comment'}, qr!^name:"VAS N91"!, 'name');
+like($json->{'collection'}->{'comment'}, qr!embed:\[name:"Berliner Zeitung",redabs:143237\]!, 'embed');
+like($json->{'collection'}->{'comment'}, qr!embed:\[name:"Frankfurter Allgemeine",redabs:301166\]!, 'embed');
+$op1 = $json->{'collection'}->{'operands'}->[0];
+is($op1->{'@type'}, 'koral:doc', 'type');
+is($op1->{'key'}, 'corpusSigle', 'key');
+is($op1->{'match'}, 'match:eq', 'match');
+is($op1->{'value'}->[0], "F97", 'value');
+is($op1->{'value'}->[1], "F99", 'value');
done_testing;