Include field serialization in versioned json output

Change-Id: Ib48d28af2eaac4e550727f838903ebb0cf15bb74
diff --git a/lib/KorAP/XML/Batch/File.pm b/lib/KorAP/XML/Batch/File.pm
index 583bc8f..c982bdd 100644
--- a/lib/KorAP/XML/Batch/File.pm
+++ b/lib/KorAP/XML/Batch/File.pm
@@ -70,7 +70,7 @@
   };
 
   my $file;
-  my $print_text = ($self->{pretty} ? $tokens->to_pretty_json($self->{primary}) : $tokens->to_json($self->{primary}));
+  my $print_text = ($self->{pretty} ? $tokens->to_pretty_json(undef, $self->{primary}) : $tokens->to_json(undef, $self->{primary}));
 
   # There is an output file given
   if ($output) {
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index dec20cf..39e1fe0 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -377,7 +377,7 @@
     }
     else {
       my $perc = _perc(
-	$tokens->should, $tokens->have, $self->should, $self->should - $self->have
+        $tokens->should, $tokens->have, $self->should, $self->should - $self->have
       );
       $self->log->debug('With an alignment quota of ' . $perc);
     };
@@ -464,13 +464,13 @@
 
       # Get all layers
       foreach my $layer (@{$self->{support}->{$foundry}}) {
-	  my @layers = @$layer;
-	  push(@supports, $foundry . '/' . $layers[0]);
+        my @layers = @$layer;
+        push(@supports, $foundry . '/' . $layers[0]);
 
-	  # More information
-	  if ($layers[1]) {
-	      push(@supports, $foundry . '/' . join('/', @layers));
-	  };
+        # More information
+        if ($layers[1]) {
+          push(@supports, $foundry . '/' . join('/', @layers));
+        };
       };
     };
     return lc ( join ' ', sort {$a cmp $b } @supports );
@@ -488,14 +488,14 @@
 
 
 sub layer_info {
-    my $self = shift;
-    $self->{layer_info} //= [];
-    if ($_[0]) {
-	push(@{$self->{layer_info}}, @{$_[0]});
-    }
-    else {
-	return join ' ', sort {$a cmp $b } uniq @{$self->{layer_info}};
-    };
+  my $self = shift;
+  $self->{layer_info} //= [];
+  if ($_[0]) {
+    push(@{$self->{layer_info}}, @{$_[0]});
+  }
+  else {
+    return join ' ', sort {$a cmp $b } uniq @{$self->{layer_info}};
+  };
 };
 
 
@@ -531,12 +531,15 @@
 sub to_data {
   my $self = shift;
   my $primary = defined $_[0] ? $_[0] : 1;
-  my $legacy  = defined $_[1] ? $_[1] : 0;
+  my $version = defined $_[1] ? $_[1] : 0.03;
 
-  my %data = %{$self->doc->to_hash};
-  my @fields;
+  # Legacy version
+  if ($version == 0) {
 
-  if ($legacy) {
+    # Serialize meta fields
+    my %data = %{$self->doc->to_hash};
+
+    my @fields;
     push(@fields, { primaryData => $self->doc->primary->data }) if $primary;
 
     push(@fields, {
@@ -548,17 +551,39 @@
     });
 
     $data{fields} = \@fields;
+
+    return \%data;
   }
 
-  else {
+  # Version 0.03 serialization
+  elsif ($version == 0.03) {
+
+    # Serialize meta fields
+    my %data = %{$self->doc->to_hash};
+
     my $tokens = $self->to_hash;
 
     $tokens->{text} = $self->doc->primary->data if $primary;
     $data{data} = $tokens;
     $data{version} = '0.03';
-  };
 
-  \%data;
+    return \%data;
+  }
+
+  # Version 0.04 serialization
+  elsif ($version == 0.4) {
+    my %data = (
+      '@context' => 'http://korap.ids-mannheim.de/ns/koral/0.4/context.jsonld',
+      '@type' => 'koral:corpus'
+    );
+    $data{fields} = $self->doc->meta->to_koral_fields;
+
+    my $tokens = $self->to_hash;
+    $tokens->{text} = $self->doc->primary->data if $primary;
+    $data{data} = $tokens;
+    $data{version} = '0.4';
+    return \%data;
+  };
 };
 
 sub to_hash {
@@ -574,16 +599,18 @@
 
 
 sub to_json_legacy {
-  encode_json($_[0]->to_data($_[1], 1));
+  encode_json($_[0]->to_data($_[1], 0));
 };
 
 sub to_json {
-  encode_json($_[0]->to_data($_[1], 0));
+  my ($self, $version, $primary) = @_;
+  encode_json($self->to_data($primary, $version));
 };
 
 
 sub to_pretty_json {
-  JSON::XS->new->pretty->encode($_[0]->to_data($_[1]));
+  my ($self, $version, $primary) = @_;
+  JSON::XS->new->pretty->encode($self->to_data($primary, $version));
 };
 
 
diff --git a/t/meta_koral.t b/t/meta_koral.t
index 931f106..1de378d 100644
--- a/t/meta_koral.t
+++ b/t/meta_koral.t
@@ -47,8 +47,6 @@
 _contains_not($fields, 'textDomain');
 _contains_not($fields, 'keywords');
 
-# diag Dumper $fields;
-
 _contains_not($fields, 'subTitle');
 
 
diff --git a/t/tokenization.t b/t/tokenization.t
index 4c4ddaa..4e883d1 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -60,6 +60,16 @@
 like($tokens->stream->pos(14)->to_string, qr/s:Der/);
 
 
+my $json = decode_json $tokens->to_json;
+is($json->{docSigle}, 'WPD/AAA', 'DocSigle old');
+is($json->{author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author old');
+
+$json = decode_json $tokens->to_json(0.4);
+is($json->{fields}->[0]->{key}, 'corpusSigle');
+is($json->{fields}->[0]->{value}, 'WPD');
+is($json->{fields}->[6]->{key}, 'creationDate');
+is($json->{fields}->[6]->{value}, '2005');
+
 done_testing;
 
 __END__