Blame - t/script/single.t - KorAP/KorAP-XML-Krill

blob: cda1b57e95d87a96a9b2a81debf549d2a4b806db [file] [log] [blame]

Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	1	#/usr/bin/env perl
				2	use strict;
				3	use warnings;
				4	use File::Basename 'dirname';
				5	use File::Spec::Functions qw/catdir catfile/;
				6	use File::Temp qw/ :POSIX /;
				7	use Mojo::Util qw/slurp/;
				8	use Mojo::JSON qw/decode_json/;
				9	use IO::Uncompress::Gunzip;
				10	use Test::More;
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	11	use Test::Output;
				12	use Data::Dumper;
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	13
				14	my $f = dirname(__FILE__);
				15	my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	16	ok(-f $script, 'Script found');
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	17
				18	stdout_like(
				19	sub { system('perl', $script) },
				20	qr!Usage.+?korapxml2krill!s,
				21	'Usage output'
				22	);
				23
				24	my $input = catdir($f, '..', 'annotation', 'corpus', 'doc', '0001');
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	25	ok(-d $input, 'Input directory found');
				26
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	27	my $output = tmpnam();
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	28
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	29	ok(!-f $output, 'Output does not exist');
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	30
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	31	my $call = join(
				32	' ',
				33	'perl', $script,
				34	'--input' => $input,
				35	'--output' => $output,
				36	'-t' => 'OpenNLP#Tokens',
				37	'-l' => 'INFO'
				38	);
				39
				40	# Test without compression
				41	stderr_like(
				42	sub {
				43	system($call);
				44	},
				45	qr!The code took!,
				46	$call
				47	);
				48
				49	ok(-f $output, 'Output does exist');
				50	ok((my $file = slurp $output), 'Slurp data');
				51	ok((my $json = decode_json $file), 'decode json');
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	52	is($json->{textType}, 'Zeitung: Tageszeitung', 'text type');
				53	is($json->{title}, 'Beispiel Text', 'Title');
				54	is($json->{data}->{tokenSource}, 'opennlp#tokens', 'Title');
				55	is($json->{data}->{foundries}, 'base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure glemm glemm/morpho mate mate/dependency mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences', 'Foundries');
				56	like($json->{data}->{text}, qr/^Zum letzten kulturellen/, 'Foundries');
				57	is($json->{data}->{stream}->[0]->[0], '-:base/paragraphs$<i>1', 'Paragraphs');
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	58	is($json->{data}->{tokenSource}, 'opennlp#tokens', 'TokenSource');
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	59
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	60	# Delete output
				61	unlink $output;
				62	ok(!-f $output, 'Output does not exist');
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	63
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	64	$call .= ' -z';
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	65
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	66	# Test with compression
				67	stderr_like(
				68	sub { system($call); },
				69	qr!The code took!,
				70	$call
				71	);
				72
				73	ok(-f $output, 'Output does exist');
				74
				75	# Uncompress the data using a buffer
				76	my $gz = IO::Uncompress::Gunzip->new($output, Transparent => 0);
				77	($file, my $buffer) = '';
				78	while ($gz->read($buffer)) {
				79	$file .= $buffer;
				80	};
				81
				82	ok($json = decode_json($file), 'decode json');
				83
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	84	is($json->{textType}, 'Zeitung: Tageszeitung', 'text type');
				85	is($json->{title}, 'Beispiel Text', 'Title');
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	86	is($json->{data}->{tokenSource}, 'opennlp#tokens', 'TokenSource');
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	87	is($json->{data}->{foundries}, 'base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure glemm glemm/morpho mate mate/dependency mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences', 'Foundries');
				88	like($json->{data}->{text}, qr/^Zum letzten kulturellen/, 'Foundries');
				89	is($json->{data}->{stream}->[0]->[0], '-:base/paragraphs$<i>1', 'Paragraphs');
				90
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	91	# Delete output
				92	unlink $output;
				93	ok(!-f $output, 'Output does not exist');
				94
				95	# Use a different token source and skip all annotations,
				96	# except for DeReKo#Structure and Mate#Dependency
				97	$call = join(
				98	' ',
				99	'perl', $script,
				100	'--input' => $input,
				101	'--output' => $output,
				102	'-t' => 'CoreNLP#Tokens',
				103	'-s' => '#all',
				104	'-a' => 'DeReKo#Structure',
				105	'-a' => 'Mate#Dependency',
				106	'-l' => 'INFO'
				107	);
				108
				109	stderr_like(
				110	sub {
				111	system($call);
				112	},
				113	qr!The code took!,
				114	$call
				115	);
				116
				117	ok(-f $output, 'Output does exist');
				118	ok(($file = slurp $output), 'Slurp data');
				119	ok(($json = decode_json $file), 'decode json');
				120
				121	is($json->{textType}, 'Zeitung: Tageszeitung', 'text type');
				122
				123	is($json->{title}, 'Beispiel Text', 'Title');
				124	is($json->{data}->{tokenSource}, 'corenlp#tokens', 'TokenSource');
				125	is($json->{data}->{foundries}, 'dereko dereko/structure mate mate/dependency', 'Foundries');
				126
				127	like($json->{data}->{text}, qr/^Zum letzten kulturellen/, 'Foundries');
				128	is($json->{data}->{stream}->[0]->[0], '-:tokens$<i>20', 'Tokens');
				129
				130	# Test overwrite!!!
				131	# Test meta
				132	# Test sigle!
				133	# Test help
				134	# Test version
				135
Akron	e1dbc38	2016-07-08 22:24:52 +0200	[diff] [blame]	136
				137	done_testing;
				138	__END__
Akron	5f51d42	2016-08-16 16:26:43 +0200	[diff] [blame^]	139