Add pti to base
Change-Id: Ibdc100f05bdd13862cd3d6232978133d9f498245
diff --git a/t/index/base_paragraphs.t b/t/index/base_paragraphs.t
index 2abc198..f2dcfa9 100644
--- a/t/index/base_paragraphs.t
+++ b/t/index/base_paragraphs.t
@@ -4,6 +4,7 @@
use utf8;
use Test::More;
use Scalar::Util qw/weaken/;
+use Data::Dumper;
use_ok('KorAP::Document');
@@ -36,29 +37,14 @@
ok($tokens->add('Base', 'Paragraphs'), 'Add Structure');
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!base/paragraphs!, 'data');
+is($data->{stream}->[0]->[0], '-:base/paragraphs$<i>1', 'Number of paragraphs');
+is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:base/s:p$<b>64<i>0<i>129<i>17<b>1', 'Paragraph');
+is($data->{stream}->[0]->[3], '_0$<i>0<i>3', 'Position');
done_testing;
__END__
-
-
-
-
-
-done_testing;
-__END__
-
-
-sub new_tokenizer {
- my $x = $doc;
- weaken $x;
- return KorAP::Tokenizer->new(
- path => $x->path,
- doc => $x,
- foundry => 'DeReKo',
- layer => 'Structure',
- name => 'spans'
- )
-};
-
-__END__
diff --git a/t/index/base_sentences.t b/t/index/base_sentences.t
new file mode 100644
index 0000000..45c7ab3
--- /dev/null
+++ b/t/index/base_sentences.t
@@ -0,0 +1,51 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('Base', 'Sentences'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!base/sentences!, 'data');
+is($data->{stream}->[0]->[0], '-:base/sentences$<i>1', 'Number of paragraphs');
+is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[3], '<>:base/s:s$<b>64<i>0<i>129<i>17<b>2', 'Sentence');
+is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
+
+done_testing;
+
+__END__