Removed deprecated 'primary' flag
Change-Id: I7cfa0dbd38fe40edebfce2ee42b1743cacf54c5c
diff --git a/Changes b/Changes
index 2e14f56..beb5043 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.41 2020-08-06
+0.41 2020-08-07
- Added support for RWK annotations.
- Improved DGD support.
- Fixed bug in RWK support that broke on
@@ -10,6 +10,7 @@
MultiTerm->add_by_term().
- Optimization by reducing calls to _offset().
- Introduced add_span() method to MultiTermToken.
+ - Removed deprecated 'primary' flag.
0.40 2020-03-03
- Fixed XIP parser.
diff --git a/lib/KorAP/XML/Batch/File.pm b/lib/KorAP/XML/Batch/File.pm
index 02d0bc8..6dc5d7b 100644
--- a/lib/KorAP/XML/Batch/File.pm
+++ b/lib/KorAP/XML/Batch/File.pm
@@ -20,7 +20,6 @@
anno => $param{anno} || [[]],
log => $param{log} || Mojo::Log->new(level => 'fatal'),
koral => $param{koral},
- primary => $param{primary},
non_word_tokens => $param{non_word_tokens},
non_verbal_tokens => $param{non_verbal_tokens},
pretty => $param{pretty},
@@ -76,8 +75,8 @@
my $file;
my $print_text = (
$self->{pretty} ?
- $tokens->to_pretty_json($self->{koral}, $self->{primary}) :
- $tokens->to_json($self->{koral}, $self->{primary})
+ $tokens->to_pretty_json($self->{koral}) :
+ $tokens->to_json($self->{koral})
);
# There is an output file given
@@ -201,11 +200,6 @@
A L<Mojo::Log> compatible log object.
-=item primary
-
-Export primary text associated with the document.
-Defaults to C<true>.
-
=item pretty
Pretty print the output JSON.
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index 6b5176a..ffd9f80 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -535,8 +535,7 @@
sub to_data {
my $self = shift;
- my $primary = defined $_[0] ? $_[0] : 1;
- my $version = defined $_[1] ? $_[1] : 0.03;
+ my $version = defined $_[0] ? $_[0] : 0.03;
# Legacy version
if ($version == 0) {
@@ -544,18 +543,15 @@
# Serialize meta fields
my %data = %{$self->doc->to_hash};
- my @fields;
- push(@fields, { primaryData => $self->doc->primary->data }) if $primary;
-
- push(@fields, {
+ $data{fields} = [{
+ primaryData => $self->doc->primary->data
+ },{
name => $self->name,
data => $self->stream->to_array,
tokenization => lc($self->foundry) . '#' . lc($self->layer),
foundries => $self->support,
layerInfo => $self->layer_info
- });
-
- $data{fields} = \@fields;
+ }];
return \%data;
}
@@ -568,7 +564,7 @@
my $tokens = $self->to_hash;
- $tokens->{text} = $self->doc->primary->data if $primary;
+ $tokens->{text} = $self->doc->primary->data;
$data{data} = $tokens;
$data{version} = '0.03';
@@ -584,7 +580,7 @@
$data{fields} = $self->doc->meta->to_koral_fields;
my $tokens = $self->to_hash;
- $tokens->{text} = $self->doc->primary->data if $primary;
+ $tokens->{text} = $self->doc->primary->data;
$data{data} = $tokens;
$data{version} = '0.4';
return \%data;
@@ -604,18 +600,18 @@
sub to_json_legacy {
- encode_json($_[0]->to_data($_[1], 0));
+ encode_json($_[0]->to_data(0));
};
sub to_json {
- my ($self, $version, $primary) = @_;
- encode_json($self->to_data($primary, $version));
+ my ($self, $version) = @_;
+ encode_json($self->to_data($version));
};
sub to_pretty_json {
- my ($self, $version, $primary) = @_;
- JSON::XS->new->pretty->encode($self->to_data($primary, $version));
+ my ($self, $version) = @_;
+ JSON::XS->new->pretty->encode($self->to_data($version));
};
diff --git a/script/korapxml2krill b/script/korapxml2krill
index a6074f6..791d115 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -193,7 +193,9 @@
'config|cfg=s' => \(my $cfg_file),
'log|l=s' => \($cfg{log}),
'anno|a=s' => \@anno,
- 'primary|p!' => \(my $primary),
+ 'primary|p!' => sub {
+ warn 'Primary flag no longer supported!';
+ },
'pretty|y' => \(my $pretty),
'jobs|j=i' => \($cfg{jobs}),
'koral|k=f' => \($cfg{koral}),
@@ -500,7 +502,6 @@
gzip => $gzip,
log => $log,
koral => ($cfg{koral} // $KORAL_VERSION),
- primary => $primary,
pretty => $pretty,
anno => \@filtered_anno,
non_word_tokens => ($cfg{non_word_tokens} // 0),
@@ -1153,13 +1154,6 @@
Can be set multiple times.
-=item B<--primary|-p>
-
-Output primary data or not. Defaults to C<true>.
-Can be flagged using C<--no-primary> as well.
-This is I<deprecated>.
-
-
=item B<--non-word-tokens|-nwt>
Tokenize non-word tokens like word tokens (defined as matching
diff --git a/t/batch_file.t b/t/batch_file.t
index 55c4d2c..12f7e12 100644
--- a/t/batch_file.t
+++ b/t/batch_file.t
@@ -93,7 +93,6 @@
# Check layer and foundry for base tokenization
# No primary data
$bf->{anno} = [[]];
-$bf->{primary} = 0;
$bf->{foundry} = 'CoreNLP';
$bf->{layer} = 'Tokens';
@@ -102,7 +101,6 @@
ok($file = Mojo::File->new($output)->slurp, 'Slurp data');
ok($json = decode_json $file, 'decode json');
-ok(!$json->{data}->{text}, 'No Primary text');
is($json->{data}->{tokenSource}, 'corenlp#tokens', 'Title');
like($file, qr/^\{"/, 'No pretty printing');