Introduce object mechanism to VCs in vc conversion tool
Change-Id: If5d06a0b9b08b9c39700a0eb471a0edfb0d5daf5
diff --git a/core/Changes b/core/Changes
index 12117b3..51e40ed 100644
--- a/core/Changes
+++ b/core/Changes
@@ -5,6 +5,9 @@
- Added welcome page.
11/05/2020
- Added tool to create VC from list (diewald)
+29/05/2020
+ - Improved conversion tool to deal with Cosmas-II VC definitions
+ (diewald)
# version 0.62.3
03/12/2019
diff --git a/tools/lib/KorAP/VirtualCorpus.pm b/tools/lib/KorAP/VirtualCorpus.pm
new file mode 100644
index 0000000..58adc6b
--- /dev/null
+++ b/tools/lib/KorAP/VirtualCorpus.pm
@@ -0,0 +1,104 @@
+package KorAP::VirtualCorpus;
+use strict;
+use warnings;
+
+
+# Get or set name of the VC
+sub name {
+ my $self = shift;
+ unless (@_) {
+ return $self->{name};
+ };
+ $self->{name} = shift;
+ return $self;
+};
+
+
+# Comment
+sub comment {
+ my $self = shift;
+ unless (@_) {
+ return $self->{comment};
+ };
+ $self->{comment} //= [];
+
+ push @{$self->{comment}}, @_;
+ return $self;
+};
+
+
+# Flatten the object - can be overwritten
+sub flatten {
+ shift;
+};
+
+
+# Serialize to koral object - can be overwritten
+sub to_koral {
+ shift;
+};
+
+
+# Quote utility function
+sub quote {
+ shift;
+ my $str = shift;
+ $str =~ s/(["\\])/\\$1/g;
+ return qq{"$str"};
+};
+
+
+# Escaped quote utility function
+sub equote {
+ shift;
+ my $str = shift;
+ $str =~ s/(["\\])/\\$1/g;
+ $str =~ s/(["\\])/\\$1/g;
+ return '\\"' . $str . '\\"';
+};
+
+
+sub _commentparam_to_string {
+ my $self = shift;
+ my $comment = $self->_comment_to_string;
+ if ($comment) {
+ return qq!,"comment":"$comment"!;
+ };
+ return '';
+};
+
+
+sub _comment_to_string {
+ my $self = shift;
+ if (!$self->name && !$self->comment) {
+ return '';
+ };
+
+ my $json = '';
+ $json .= 'name:' . $self->equote($self->name) if $self->name;
+ if ($self->name && $self->comment) {
+ $json .= ','
+ };
+ $json .= join(',', @{$self->{comment}}) if $self->{comment};
+
+ return $json;
+};
+
+
+# Stringify collection
+sub to_string {
+ my $self = shift;
+ ## Create collection object
+
+ my $obj = $self->to_koral;
+
+ my $json = '{';
+ $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
+ $json .= '"collection":';
+ $json .= $obj->_to_fragment;
+ # Set at the end, when all comments are done
+ $json .= $obj->_commentparam_to_string;
+ return $json .= '}';
+};
+
+1;
diff --git a/tools/lib/KorAP/VirtualCorpus/And.pm b/tools/lib/KorAP/VirtualCorpus/And.pm
new file mode 100644
index 0000000..613710b
--- /dev/null
+++ b/tools/lib/KorAP/VirtualCorpus/And.pm
@@ -0,0 +1,69 @@
+package KorAP::VirtualCorpus::And;
+use strict;
+use warnings;
+use base 'KorAP::VirtualCorpus';
+
+
+# TODO:
+# Support comments!
+
+
+# Constructor
+sub new {
+ my $class = shift;
+ bless { ops => [@_] }, $class;
+};
+
+
+# Get koral type
+sub koral_type {
+ return 'And';
+};
+
+
+# Get operands
+sub operands {
+ shift->{ops};
+};
+
+
+# Flatten group
+sub flatten {
+ my $self = shift;
+
+ my @ops;
+
+ foreach (@{$self->{ops}}) {
+ if ($_->koral_type eq 'And') {
+ push @ops, @{$_->operands};
+ }
+
+ else {
+ push @ops, $_->flatten;
+ };
+ };
+
+ $self->{ops} = \@ops;
+
+ return $self;
+};
+
+
+# Serialize fragment
+sub _to_fragment {
+ my $self = shift;
+ my $json = '{';
+ $json .= '"@type":"koral:docGroup",';
+ $json .= '"operation":"operation:and",';
+ $json .= '"operands":[';
+ $json .= join(',', map { $_->_to_fragment } @{$self->{ops}});
+ $json .= ']';
+
+ # Set at the end, when all comments are done
+ $json .= $self->_commentparam_to_string;
+ $json .= '}';
+ return $json;
+};
+
+
+1;
diff --git a/tools/lib/KorAP/VirtualCorpus/Doc.pm b/tools/lib/KorAP/VirtualCorpus/Doc.pm
new file mode 100644
index 0000000..e635d3d
--- /dev/null
+++ b/tools/lib/KorAP/VirtualCorpus/Doc.pm
@@ -0,0 +1,96 @@
+package KorAP::VirtualCorpus::Doc;
+use strict;
+use warnings;
+use base 'KorAP::VirtualCorpus';
+
+# Constructor
+sub new {
+ my $class = shift;
+ bless {
+ key => shift,
+ match => 'eq',
+ type => 'string',
+ value => ''
+ }, $class;
+};
+
+
+# Clone document VC
+sub clone {
+ my $self = shift;
+ bless {
+ key => $self->{key},
+ match => $self->{match},
+ type => $self->{type},
+ value => $self->{value},
+ }, __PACKAGE__;
+};
+
+
+# Return object type
+sub koral_type {
+ return 'Doc';
+};
+
+
+# Get or set type
+sub type {
+ my $self = shift;
+ if (@_) {
+ $self->{type} = shift;
+ return $self;
+ };
+ return $self->{type};
+};
+
+
+# Get or set match
+sub match {
+ my $self = shift;
+ if (@_) {
+ $self->{match} = shift;
+ return $self;
+ };
+ return $self->{match};
+};
+
+
+# Get or set key
+sub key {
+ my $self = shift;
+ if (@_) {
+ $self->{key} = shift;
+ return $self;
+ };
+ return $self->{key};
+};
+
+
+# Get or set value
+sub value {
+ my $self = shift;
+ if (@_) {
+ $self->{value} = shift;
+ return $self;
+ };
+ return $self->{value};
+};
+
+
+# Stringify fragment
+sub _to_fragment {
+ my $self = shift;
+ my $json = '{';
+ $json .= '"@type":"koral:doc",';
+ $json .= '"type":"type:' . $self->type . '",';
+ $json .= '"match":"match:' . $self->match . '",';
+ $json .= '"key":"' . $self->key . '",';
+ $json .= '"value":' . $self->quote($self->value);
+
+ # Set at the end, when all comments are done
+ $json .= $self->_commentparam_to_string;
+ return $json . '}';
+};
+
+
+1;
diff --git a/tools/lib/KorAP/VirtualCorpus/DocVec.pm b/tools/lib/KorAP/VirtualCorpus/DocVec.pm
new file mode 100644
index 0000000..49fefb8
--- /dev/null
+++ b/tools/lib/KorAP/VirtualCorpus/DocVec.pm
@@ -0,0 +1,48 @@
+package KorAP::VirtualCorpus::DocVec;
+use strict;
+use warnings;
+use base 'KorAP::VirtualCorpus::Doc';
+
+# Constructor
+sub new {
+ my $class = shift;
+ bless {
+ key => shift,
+ match => 'eq',
+ type => 'string',
+ value => []
+ }, $class;
+};
+
+# Return object type
+sub koral_type {
+ return 'DocVec';
+};
+
+# Get or set value
+sub value {
+ my $self = shift;
+ if (@_) {
+ $self->{value} = [@_];
+ return $self;
+ };
+ return $self->{value};
+};
+
+# Stringify fragment
+sub _to_fragment {
+ my $self = shift;
+ my $json = '{';
+ $json .= '"@type":"koral:doc",';
+ $json .= '"type":"type:' . $self->type . '",';
+ $json .= '"match":"match:' . $self->match . '",';
+ $json .= '"key":"' . $self->key . '",';
+ $json .= '"value":[' . join(',', map { $self->quote($_) } @{$self->value}) . ']';
+
+ # Set at the end, when all comments are done
+ $json .= $self->_commentparam_to_string;
+ return $json . '}';
+};
+
+
+1;
diff --git a/tools/lib/KorAP/VirtualCorpus/Group.pm b/tools/lib/KorAP/VirtualCorpus/Group.pm
new file mode 100644
index 0000000..4062cf9
--- /dev/null
+++ b/tools/lib/KorAP/VirtualCorpus/Group.pm
@@ -0,0 +1,159 @@
+package KorAP::VirtualCorpus::Group;
+use strict;
+use warnings;
+use base 'KorAP::VirtualCorpus';
+use KorAP::VirtualCorpus::Doc;
+use KorAP::VirtualCorpus::And;
+use KorAP::VirtualCorpus::Or;
+
+# Abstract KoralQuery object that normalize to And or Or.
+
+# Construct a new VC group
+sub new {
+ my $class = shift;
+ bless {
+ # New try
+ ops => undef,
+ type => undef
+ }, $class;
+};
+
+
+# Clone object
+sub clone {
+ my $self = shift;
+ my $clone = {};
+ $clone->{ops} = [@{$self->{ops}}] if $self->{ops};
+ $clone->{type} = $self->{type};
+ $clone->{name} = $self->{name};
+ bless $clone, __PACKAGE__;
+};
+
+
+# Add operand
+sub add {
+ my ($self, $type, $op) = @_;
+
+ if (!$self->{ops}) {
+ push @{$self->{ops}}, $op;
+ return $self;
+ };
+
+ if (!$self->{type}) {
+ push @{$self->{ops}}, $op;
+ $self->{type} = $type;
+ return $self;
+ };
+
+ if ($self->{type} eq $type) {
+ push @{$self->{ops}}, $op;
+ return $self;
+ };
+
+ if ($self->{type} eq 'union') {
+ my $vc = KorAP::VirtualCorpus::Or->new(
+ @{$self->{ops}}
+ );
+
+ $self->{type} = 'joint';
+ $self->{ops} = [$vc, $op];
+ return $self;
+ };
+
+ my $vc = KorAP::VirtualCorpus::And->new(
+ @{$self->{ops}}
+ );
+
+ $self->{type} = 'union';
+ $self->{ops} = [$vc, $op];
+ return $self;
+};
+
+
+# Serialize to koral
+sub to_koral {
+ my $self = shift;
+
+ # Single object
+ if (@{$self->{ops}} == 1) {
+ return $self->{ops}->[0]->name($self->name)->flatten;
+ };
+
+ # Union group
+ if ($self->{type} eq 'union') {
+ return KorAP::VirtualCorpus::Or->new(
+ @{$self->{ops}}
+ )->name($self->name)->flatten;
+ }
+
+ # Joint group
+ elsif ($self->{type} eq 'joint') {
+ return KorAP::VirtualCorpus::And->new(
+ @{$self->{ops}}
+ )->name($self->name)->flatten;
+ };
+};
+
+
+# Define an operand to be "or"ed
+sub union {
+ my $self = shift;
+ $self->add('union', shift)
+};
+
+
+# Define a field that should be "or"ed
+sub union_field {
+ my $self = shift;
+ my $field = shift;
+ my $value = shift;
+ $self->union(
+ KorAP::VirtualCorpus::Doc->new($field)->value($value)
+ );
+};
+
+# Define an operand to be "and"ed
+sub joint {
+ my $self = shift;
+ $self->add('joint', shift)
+};
+
+
+# Define a field that should be "and"ed
+sub joint_field {
+ my $self = shift;
+ my $field = shift;
+ my $value = shift;
+ $self->joint(
+ KorAP::VirtualCorpus::Doc->new($field)->value($value)
+ );
+};
+
+
+# Restrict VC by date
+sub from {
+ my $self = shift;
+ my ($year, $month) = @_;
+ my $doc = KorAP::VirtualCorpus::Doc->new('createDate')
+ ->type('date')
+ ->match('geq')
+ ->value($year . '-' . $month);
+ $self->joint($doc);
+};
+
+
+# Restrict VC by date
+sub to {
+ my $self = shift;
+ my ($year, $month) = @_;
+ my $doc = KorAP::VirtualCorpus::Doc->new('createDate')
+ ->type('date')
+ ->match('leq')
+ ->value($year . '-' . $month);
+ $self->joint($doc);
+};
+
+
+1;
+
+__END__
diff --git a/tools/lib/KorAP/VirtualCorpus/Or.pm b/tools/lib/KorAP/VirtualCorpus/Or.pm
new file mode 100644
index 0000000..a795ed3
--- /dev/null
+++ b/tools/lib/KorAP/VirtualCorpus/Or.pm
@@ -0,0 +1,86 @@
+package KorAP::VirtualCorpus::Or;
+use strict;
+use warnings;
+use KorAP::VirtualCorpus::DocVec;
+use base 'KorAP::VirtualCorpus';
+
+
+# TODO:
+# Support comments!
+
+
+# Constructor
+sub new {
+ my $class = shift;
+ bless { ops => [@_] }, $class;
+};
+
+
+# Get koral type
+sub koral_type {
+ return 'Or';
+};
+
+
+# Get operands
+sub operands {
+ shift->{ops};
+};
+
+
+# Flatten the group
+sub flatten {
+ my $self = shift;
+ my %fields = ();
+ my @ops;
+
+ foreach (@{$self->{ops}}) {
+ if ($_->koral_type eq 'Doc') {
+ if ($_->type eq 'string' && $_->match eq 'eq') {
+ $fields{$_->key} //= [];
+ push @{$fields{$_->key}}, $_->value;
+ };
+ }
+ elsif ($_->koral_type eq 'DocVec') {
+ $fields{$_->key} //= [];
+ push @{$fields{$_->key}}, @{$_->value};
+ }
+ elsif ($_->koral_type eq 'Or') {
+ push @ops, @{$_->operands}
+ }
+ else {
+ push @ops, $_->flatten;
+ }
+ };
+
+ # Vectorize fields
+ foreach (sort keys %fields) {
+ push @ops, KorAP::VirtualCorpus::DocVec->new($_)->value(@{$fields{$_}});
+ };
+
+ if (@ops == 1) {
+ return $ops[0]->name($self->name);
+ };
+
+ $self->{ops} = \@ops;
+ return $self;
+};
+
+
+# Serialize to fragment
+sub _to_fragment {
+ my $self = shift;
+ my $json = '{';
+ $json .= '"@type":"koral:docGroup",';
+ $json .= '"operation":"operation:or",';
+ $json .= '"operands":[';
+ $json .= join(',', map { $_->_to_fragment } @{$self->{ops}});
+ $json .= ']';
+ # Set at the end, when all comments are done
+ $json .= $self->_commentparam_to_string;
+ $json .= '}';
+ return $json;
+};
+
+
+1;
diff --git a/tools/list2vc.pl b/tools/list2vc.pl
index 35a205a..3e31e50 100755
--- a/tools/list2vc.pl
+++ b/tools/list2vc.pl
@@ -1,251 +1,13 @@
#!/usr/bin/env perl
-package KorAP::VirtualCorpus;
use strict;
use warnings;
-
-
-# Get or set name of the VC
-sub name {
- my $self = shift;
- unless (@_) {
- return $self->{name};
- };
- $self->{name} = shift;
- return $self;
-};
-
-
-# Comment
-sub comment {
- my $self = shift;
- unless (@_) {
- return $self->{comment};
- };
- $self->{comment} //= [];
-
- push @{$self->{comment}}, shift;
- return $self;
-};
-
-
-# Quote utility function
-sub quote {
- shift;
- my $str = shift;
- $str =~ s/(["\\])/\\$1/g;
- return qq{"$str"};
-};
-
-
-# Escaped quote utility function
-sub equote {
- shift;
- my $str = shift;
- $str =~ s/(["\\])/\\$1/g;
- $str =~ s/(["\\])/\\$1/g;
- return '\\"' . $str . '\\"';
-};
-
-
-sub _commentparam_to_string {
- my $self = shift;
- my $comment = $self->_comment_to_string;
- if ($comment) {
- return qq!,"comment":"$comment"!;
- };
- return '';
-};
-
-
-sub _comment_to_string {
- my $self = shift;
- if (!$self->name && !$self->comment) {
- return '';
- };
-
- my $json = '';
- $json .= 'name:' . $self->equote($self->name) if $self->name;
- if ($self->name && $self->comment) {
- $json .= ','
- };
- $json .= join(',', @{$self->{comment}}) if $self->{comment};
-
- return $json;
-};
-
-
-# Stringify globally
-sub to_string {
- my $self = shift;
- ## Create collection object
-
- my $json = '{';
- $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
- $json .= '"collection":';
- $json .= $self->_to_fragment;
- # Set at the end, when all comments are done
- $json .= $self->_commentparam_to_string;
- return $json .= '}';
-};
-
-
-package KorAP::VirtualCorpus::Group;
-use strict;
-use warnings;
-use base 'KorAP::VirtualCorpus';
-
-
-# Construct a new VC group
-sub new {
- my $class = shift;
- bless {
- with => [],
- with_fields => {},
- without => [],
- without_fields => {},
- }, $class;
-};
-
-
-# Define an operand to be "or"ed
-sub with {
- my $self = shift;
- push @{$self->{with}}, shift;
-};
-
-
-# Define a field that should be "or"ed
-sub with_field {
- my $self = shift;
- my $field = shift;
- push @{$self->{with_fields}->{$field}}, shift;
-};
-
-# Define an operand to be "and"ed
-sub without {
- my $self = shift;
- push @{$self->{without}}, shift;
-};
-
-
-# Define a field that should be "and"ed
-sub without_field {
- my $self = shift;
- my $field = shift;
- push @{$self->{without_fields}->{$field}}, shift;
-};
-
-
-# VC contains only with fields
-sub only_with_fields {
- my $self = shift;
-
- if (keys %{$self->{without_fields}} || @{$self->{with}} || @{$self->{without}}) {
- return 0;
- };
-
- return 1;
-};
-
-
-# Create a document vector field
-sub _doc_vec {
- my $field = shift;
- my $vec = shift;
- my $json = '{';
- $json .= '"@type":"koral:doc",';
- $json .= '"key":"' . $field . '",';
- $json .= '"match":"match:eq",';
- $json .= '"value":[';
- $json .= join ',', map { '"' . $_ . '"' } @$vec;
- $json .= ']';
- $json .= '},';
- return $json;
-}
-
-
-# Stringify fragment
-sub _to_fragment {
- my $self = shift;
-
- my $json = '{';
- $json .= '"@type":"koral:docGroup",';
-
- # Make the outer group "and"
- if (keys %{$self->{without_fields}}) {
- $json .= '"operation":"operation:and",';
- $json .= '"operands":[';
-
- foreach my $field (sort keys %{$self->{without_fields}}) {
- unless (@{$self->{without_fields}->{$field}}) {
- next;
- };
- $json .= _doc_vec($field, $self->{without_fields}->{$field});
- };
-
- # Remove the last comma
- chop $json;
-
- $json .= ']';
- }
-
- elsif (keys %{$self->{with_fields}} || @{$self->{with}}) {
- $json .= '"operation":"operation:or",';
-
- $json .= '"operands":[';
-
- # Flatten embedded "or"-VCs
- foreach my $op (@{$self->{with}}) {
-
- # The embedded VC has only extending fields
- if ($op->only_with_fields) {
-
- $self->comment('embed:[' . $op->_comment_to_string . ']');
-
- foreach my $k (keys %{$op->{with_fields}}) {
- foreach my $v (@{$op->{with_fields}->{$k}}) {
- $self->with_field($k, $v);
- };
- };
- }
-
- # Embed complex VC
- else {
- $json .= $op->_to_fragment . ',';
- };
- };
-
- foreach my $field (sort keys %{$self->{with_fields}}) {
- unless (@{$self->{with_fields}->{$field}}) {
- next;
- };
- $json .= _doc_vec($field, $self->{with_fields}->{$field});
- };
-
- # Remove the last comma
- chop $json;
-
- $json .= ']';
- }
-
- # No operands in the group
- else {
- # Remove the last comma after the comment
- chop $json;
- };
-
- # Set at the end, when all comments are done
- $json .= $self->_commentparam_to_string;
- return $json . '}';
-};
-
-
-package main;
-use strict;
-use warnings;
+use lib 'lib';
+use KorAP::VirtualCorpus::Group;
# 2020-05-20
# Preliminary support for C2 def-files.
+# 2020-05-29
+# Introduce optimizable object system.
our @ARGV;
@@ -352,24 +114,24 @@
# Convert C2 sigle to KorAP form
$value =~ s!^([^/]+?/[^\.]+?)\.(.+?)$!$1\/$2!;
- ${$vc}->with_field(textSigle => $value);
+ ${$vc}->union_field(textSigle => $value);
}
# Add doc field
elsif ($key eq 'doc') {
- ${$vc}->with_field(docSigle => $value);
+ ${$vc}->union_field(docSigle => $value);
}
# Add corpus field
elsif ($key eq 'corpus') {
- ${$vc}->with_field(corpusSigle => $value);
+ ${$vc}->union_field(corpusSigle => $value);
}
# Add corpus field
elsif ($key eq 'cn') {
# Korpussigle, z.B. 'F97 Frankfurter Allgemeine 1997'
if ($value =~ m!^([^\/\s]+)(?:\s.+?)?$!) {
- ${$vc}->with_field(corpusSigle => $1);
+ ${$vc}->union_field(corpusSigle => $1);
};
}
@@ -430,19 +192,108 @@
next;
};
- $$vc->with($all_vcs{$value});
+ $$vc->union($all_vcs{$value}->clone->to_koral);
}
+ # AND definition
+ elsif ($key eq 'and') {
+ unless (defined $all_vcs{$value}) {
+ # warn 'VC ' . $value . ' not defined';
+ # exit(1);
+ next;
+ };
+
+ $$vc->joint($all_vcs{$value}->clone->to_koral);
+ }
+
+ # Source of the corpus
+ elsif ($key eq 'ql') {
+ # Quellenname, z.B. "Neue Zürcher Zeitung"
+ $$vc->union_field(corpusTitle => $value);
+ }
+
+ elsif ($key eq 'sub') {
+ # "Sub" is the difference - it is the "and not" operation.
+ warn $key . ' is not yet supported';
+ }
+
+ elsif ($key eq 'co') {
+ # Country, z.B. DE für Text in Deutschland erschienen
+ warn $key . ' is not yet supported';
+ }
+
+ elsif ($key eq 'tl') {
+ # Textlength, Bereich von Texten der angegebenen Länge [in Anz. Wörtern]
+ warn $key . ' is not yet supported';
+ }
+
+ elsif ($key eq 'ts') {
+ # Textsorte, z.B. "Bericht"
+ warn $key . ' is not yet supported';
+ }
+
+ elsif ($key eq 'th') {
+ # Thema, z.B. "Sport - Fußball"
+ warn $key . ' is not yet supported';
+ }
+
+ elsif ($key eq 'red') {
+ # Reduktionsfaktor
+ # Wert zw. 1-99%: virt. Korpus wird auf diesen Wert
+ # reduziert. Modus: feste Reduzierung, nicht variabel.
+ warn $key . ' is not yet supported';
+ }
+
+ elsif ($key eq 'thprob') {
+ # ThemaProbability
+ # Wert, der für <th>Thema verwendet wird um zu bestimmen, ab welchem
+ # Zuverläßigkeitswert ein Thema übernommen wird
+ }
+
+
# Add reduction value as a comment
elsif ($key eq 'redabs') {
# "red. Anz. Texte
# absoluter Wert der durch Reduktion zu erzielende Anzahl Texte"
$$vc->comment('redabs:' . $value);
+ warn $key . ' is not yet supported';
+ }
+
+ # Add reduction value as a comment
+ elsif ($key eq 'date') {
+ # Supports two pattern schemes:
+ # m1=Year1/Month1 bis Year2/Month2
+ # Datumsbereich Schema 1: z.B. "2000/01 bis 2010/12"
+
+ # Schema 1
+ if ($value =~ m!^(?:m1\s*=\s*)?\s*(\d+)\/(\d+) bis (\d+)\/(\d+)\s*$!s) {
+ my ($y1, $m1, $y2, $m2) = ($1, $2, $3, $4);
+ if ($m1 < 10) {
+ $m1 = '0' . (0+$m1);
+ };
+ if ($m2 < 10) {
+ $m2 = '0' . (0+$m2);
+ };
+ $$vc->from($y1, $m1);
+ $$vc->to($y2, $m2);
+ }
+
+ # Scheme 2
+ elsif ($value =~ m!^\s*\d{4}-\d{4}\s+und\s+\d{1,2}-\d{1,2}\s*$!) {
+ # m2=Year1-Year2 und Month1-Month2
+ # Datumsbereich Schema 2: z.B. "1990-2000 und 06-06"
+
+ warn 'Second date scheme not yet supported!'
+ }
+
+ else {
+ warn 'Unknown date scheme ' . $value;
+ };
}
# Unknown
else {
- # warn $key . ' is an unknown field';
+ warn $key . ' is an unknown field';
};
};
diff --git a/tools/t/list2vc-def.t b/tools/t/list2vc-def.t
index baaefda..a3b60e7 100644
--- a/tools/t/list2vc-def.t
+++ b/tools/t/list2vc-def.t
@@ -44,38 +44,41 @@
my $list3 = catfile(dirname(__FILE__), 'data', 'list3.def');
+
# Check JSON
# Only return extended area
$json = decode_json(join('', `$script $list3`));
-is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type');
-is($json->{'collection'}->{'operation'}, 'operation:or', 'operation');
+is($json->{'collection'}->{'@type'}, 'koral:doc', 'type');
+
+
is($json->{'collection'}->{'comment'}, 'name:"VAS-N91 (Stand \"2013\", korr. 2017)"', 'type');
-$op1 = $json->{'collection'}->{'operands'}->[0];
+$op1 = $json->{'collection'};
is($op1->{'@type'}, 'koral:doc', 'type');
is($op1->{'key'}, 'textSigle', 'key');
is($op1->{'match'}, 'match:eq', 'match');
is($op1->{'value'}->[0], "A00/APR/23232", 'value');
is($op1->{'value'}->[1], "A00/APR/23233", 'value');
-
my $list4 = catfile(dirname(__FILE__), 'data', 'list4.def');
# Only contains intended area
$json = decode_json(join('', `$script $list4`));
is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type');
-is($json->{'collection'}->{'operation'}, 'operation:or', 'operation');
+is($json->{'collection'}->{'comment'}, 'name:"VAS N91"', 'name');
like($json->{'collection'}->{'comment'}, qr!^name:"VAS N91"!, 'name');
-like($json->{'collection'}->{'comment'}, qr!embed:\[name:"Berliner Zeitung",redabs:143237\]!, 'embed');
-like($json->{'collection'}->{'comment'}, qr!embed:\[name:"Frankfurter Allgemeine",redabs:301166\]!, 'embed');
-$op1 = $json->{'collection'}->{'operands'}->[0];
-is($op1->{'@type'}, 'koral:doc', 'type');
-is($op1->{'key'}, 'corpusSigle', 'key');
-is($op1->{'match'}, 'match:eq', 'match');
-is($op1->{'value'}->[0], "F97", 'value');
-is($op1->{'value'}->[1], "F99", 'value');
+
+my $bz = $json->{'collection'}->{operands}->[0]->{operands}->[0];
+is($bz->{operation}, 'operation:and', 'Intersection');
+is(scalar @{$bz->{operands}}, 3, 'Flatten operands');
+
+my $faz = $json->{'collection'}->{operands}->[0]->{operands}->[1];
+is($faz->{'@type'}, 'koral:doc', 'DocVec');
+is($faz->{value}->[0], 'F97', 'Value');
+is($faz->{value}->[1], 'F99', 'Value');
done_testing;
+__END__
diff --git a/tools/t/list2vc-obj.t b/tools/t/list2vc-obj.t
new file mode 100644
index 0000000..d4bbd80
--- /dev/null
+++ b/tools/t/list2vc-obj.t
@@ -0,0 +1,23 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::More;
+use Data::Dumper;
+use Mojo::JSON 'decode_json';
+
+require_ok 'KorAP::VirtualCorpus::Group';
+
+my $vc = KorAP::VirtualCorpus::Group->new;
+$vc->union_field('author', 'Goethe');
+$vc->union_field('author', 'Schiller');
+$vc->joint_field('author', 'Fontane');
+
+
+my $json = decode_json $vc->to_koral->to_string;
+
+is($json->{collection}->{operation}, 'operation:and');
+is($json->{collection}->{operands}->[0]->{'@type'}, 'koral:doc');
+is($json->{collection}->{operands}->[0]->{'value'}->[0], 'Goethe');
+is($json->{collection}->{operands}->[0]->{'value'}->[1], 'Schiller');
+
+done_testing;