Blame - t/index/base_paragraphs.t - KorAP/KorAP-XML-Krill

blob: 2abc19841e821c1c899adf630494c269acf0d99e [file] [log] [blame]

Akron	1622dd9	2015-12-09 22:34:26 +0100	[diff] [blame^]	1	#!/usr/bin/env perl
				2	use strict;
				3	use warnings;
				4	use utf8;
				5	use Test::More;
				6	use Scalar::Util qw/weaken/;
				7
				8	use_ok('KorAP::Document');
				9
				10	use File::Basename 'dirname';
				11	use File::Spec::Functions 'catdir';
				12
				13	my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
				14
				15	ok(my $doc = KorAP::Document->new(
				16	path => $path . '/'
				17	), 'Load Korap::Document');
				18
				19	like($doc->path, qr!$path/$!, 'Path');
				20	ok($doc->parse, 'Parse document');
				21
				22	ok($doc->primary->data, 'Primary data in existence');
				23	is($doc->primary->data_length, 129, 'Data length');
				24
				25	use_ok('KorAP::Tokenizer');
				26
				27	ok(my $tokens = KorAP::Tokenizer->new(
				28	path => $doc->path,
				29	doc => $doc,
				30	foundry => 'OpenNLP',
				31	layer => 'Tokens',
				32	name => 'tokens'
				33	), 'New Tokenizer');
				34
				35	ok($tokens->parse, 'Parse');
				36
				37	ok($tokens->add('Base', 'Paragraphs'), 'Add Structure');
				38
				39
				40	done_testing;
				41
				42	__END__
				43
				44
				45
				46
				47
				48	done_testing;
				49	__END__
				50
				51
				52	sub new_tokenizer {
				53	my $x = $doc;
				54	weaken $x;
				55	return KorAP::Tokenizer->new(
				56	path => $x->path,
				57	doc => $x,
				58	foundry => 'DeReKo',
				59	layer => 'Structure',
				60	name => 'spans'
				61	)
				62	};
				63
				64	__END__