Fixed pagebreak treatment in script
Change-Id: I370a600ea161ae0b03fd36891d6959f8ba8bf359
diff --git a/script/korapxml2krill b/script/korapxml2krill
index fab6147..43ac47a 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -144,6 +144,7 @@
$base_sentences = lc $base_sentences;
$base_paragraphs = lc $base_paragraphs;
+$base_pagebreaks = lc $base_pagebreaks;
my %ERROR_HASH = (
-sections => 'NAME|SYNOPSIS|ARGUMENTS|OPTIONS',
@@ -197,6 +198,7 @@
if ($base_paragraphs eq 'dereko#structure') {
push @dereko_attr, 'paragraphs';
};
+
if ($base_pagebreaks eq 'dereko#structure') {
push @dereko_attr, 'pagebreaks';
};
diff --git a/t/script/single.t b/t/script/single.t
index 486e1b6..a1d6c58 100644
--- a/t/script/single.t
+++ b/t/script/single.t
@@ -204,6 +204,42 @@
is($json->{keywords}, 'sgbrKodex:T', 'keywords');
is($json->{publisher}, 'Dorfblatt GmbH', 'publisher');
+
+
+# AGA with base info
+unlink $output;
+ok(!-f $output, 'Output does not exist');
+$input = catdir($f, '..', 'corpus', 'GOE2', 'AGA', '03828');
+ok(-d $input, 'Input directory found');
+
+ok(!-f $output, 'Output does not exist');
+
+$call = join(
+ ' ',
+ 'perl', $script,
+ '--input' => $input,
+ '--output' => $output,
+ '-t' => 'base#tokens_aggr',
+ '-bs' => 'DeReKo#Structure',
+ '-bp' => 'DeReKo#Structure',
+ '-bpb' => 'DeReKo#Structure',
+ '-l' => 'INFO'
+);
+
+stderr_like(
+ sub {
+ system($call);
+ },
+ qr!The code took!,
+ $call
+);
+ok(-f $output, 'Output does exist');
+ok(($file = Mojo::File->new($output)->slurp), 'Slurp data');
+ok(($json = decode_json $file), 'decode json');
+
+is($json->{title}, 'Autobiographische Einzelheiten', 'title');
+is($json->{data}->{stream}->[0]->[-1], '~:base/s:pb$<i>529<i>0', 'Pagebreak annotation');
+
done_testing;
__END__