blob: 95ef8904f85c543ce942805a717e9078e0f4903e [file] [log] [blame]
Nils Diewald98767bb2014-04-25 20:31:19 +00001#!/usr/bin/env perl
2# source ~/perl5/perlbrew/etc/bashrc
3# perlbrew switch perl-blead@korap
4use strict;
5use warnings;
6use utf8;
7use Test::More;
8use Benchmark ':hireswallclock';
9use lib 'lib', '../lib';
10use Scalar::Util qw/weaken/;
11
12use File::Basename 'dirname';
13use File::Spec::Functions 'catdir';
14
15use_ok('KorAP::Document');
16
Nils Diewald1448c262015-10-01 17:25:33 +000017# Tests for material identicality of a token
18sub _t2h {
19 my $string = shift;
20 $string =~ s/^\[\(\d+?-\d+?\)(.+?)\]$/$1/;
21 my %hash = ();
22 foreach (split(qr!\|!, $string)) {
23 $hash{$_} = 1;
24 };
25 return \%hash;
26};
27
28
Nils Diewald98767bb2014-04-25 20:31:19 +000029my $path = catdir(dirname(__FILE__), 'artificial');
30ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
Nils Diewald6a2a14b2015-06-17 20:34:24 +000031like($doc->path, qr!$path/$!, 'Path');
Nils Diewald98767bb2014-04-25 20:31:19 +000032ok($doc->parse, 'Parse document');
33
34sub new_tokenizer {
35 my $x = $doc;
36 weaken $x;
37 return KorAP::Tokenizer->new(
38 path => $x->path,
39 doc => $x,
40 foundry => 'OpenNLP',
41 layer => 'Tokens',
42 name => 'tokens'
43 )
44};
45
46is($doc->primary->data,
47 'Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, '.
48 'bevor der Betrieb Ende Schuljahr eingestellt wird.', 'Primary data');
49
50is($doc->primary->data_length, 129, 'Primary data length');
51
52is($doc->primary->data(0,3), 'Zum', 'Get primary data');
53
54# Get tokens
55use_ok('KorAP::Tokenizer');
56# Get tokenization
57ok(my $tokens = KorAP::Tokenizer->new(
58 path => $doc->path,
59 doc => $doc,
60 foundry => 'OpenNLP',
61 layer => 'Tokens',
62 name => 'tokens'
63), 'New Tokenizer');
64ok($tokens->parse, 'Parse');
65
66is($tokens->foundry, 'OpenNLP', 'Foundry');
67
Nils Diewald840c9242014-10-28 19:51:26 +000068is($tokens->doc->text_sigle, 'ART_ABC.00001', 'Doc id');
Nils Diewald98767bb2014-04-25 20:31:19 +000069is($tokens->should, 20, 'Should');
70is($tokens->have, 18, 'Have');
71is($tokens->name, 'tokens', 'Name');
72is($tokens->layer, 'Tokens', 'Layer');
73
Nils Diewald6d565072014-10-30 23:20:58 +000074is($tokens->stream->pos(0)->to_string, '[(0-3)-:tokens$<i>18|_0#0-3|i:zum|s:Zum]', 'Token is correct');
75
76is($tokens->stream->pos(1)->to_string, '[(4-11)_1#4-11|i:letzten|s:letzten]', 'Token is correct');
Nils Diewald98767bb2014-04-25 20:31:19 +000077
78my $i = 2;
79foreach ([12,23, 'kulturellen'],
80 [24,30, 'Anlass'],
81 [31,35, 'lädt'],
82 [36,39, 'die'],
83 [40,47, 'Leitung'],
84 [48,51, 'des'],
85 [52,63, 'Schulheimes'],
86 [64,73, 'Hofbergli'],
87 [74,77, 'ein'],
88 [79,84, 'bevor'],
89 [85,88, 'der'],
90 [89,96, 'Betrieb'],
91 [97,101, 'Ende'],
92 [102,111, 'Schuljahr'],
93 [112,123, 'eingestellt'],
94 [124,128, 'wird']
95 ) {
96 is($tokens->stream->pos($i++)->to_string,
97 '[('.$_->[0].'-'.$_->[1].')'.
Nils Diewald6d565072014-10-30 23:20:58 +000098 '_'.($i-1).'#'.$_->[0].'-'.$_->[1] . '|' .
99 'i:'.lc($_->[2]).'|s:'.$_->[2].']',
Nils Diewald98767bb2014-04-25 20:31:19 +0000100 'Token is correct');
101};
102
103ok(!$tokens->stream->pos($i++), 'No more tokens');
104
105# Add OpenNLP/morpho
106ok($tokens->add('OpenNLP', 'Morpho'), 'Add OpenNLP/Morpho');
107
108$i = 0;
109foreach (qw/APPRART ADJA ADJA NN VVFIN ART NN ART NN NE PTKVZ KOUS ART NN NN NN VVPP VAFIN/) {
110 like($tokens->stream->pos($i++)->to_string,
111 qr!\|opennlp/p:$_!,
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000112 'Annotation (OpenNLP/p) is correct: ' . $_
113 );
Nils Diewald98767bb2014-04-25 20:31:19 +0000114};
115
116# Add OpenNLP/sentences
117ok($tokens->add('OpenNLP', 'Sentences'), 'Add OpenNLP/Sentences');
118
Nils Diewald6a2a14b2015-06-17 20:34:24 +0000119is($tokens->stream->pos(0)->to_string,
120 '[(0-3)-:opennlp/sentences$<i>1|-:tokens$<i>18|<>:opennlp/s:s#0-129$<i>17<b>0|_0#0-3|i:zum|opennlp/p:APPRART|s:Zum]',
Nils Diewald1448c262015-10-01 17:25:33 +0000121 # '[(0-3)-:opennlp/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|opennlp/p:APPRART|<>:opennlp/s:s#0-129$<i>17]',
Nils Diewald6a2a14b2015-06-17 20:34:24 +0000122 'Correct sentence'
123 );
Nils Diewald98767bb2014-04-25 20:31:19 +0000124
125# New instantiation
126ok($tokens = KorAP::Tokenizer->new(
127 path => $doc->path,
128 doc => $doc,
129 foundry => 'OpenNLP',
130 layer => 'Tokens',
131 name => 'tokens'
132), 'New Tokenizer');
133
134ok($tokens->parse, 'Parse');
135
136# Add OpenNLP/sentences
137ok($tokens->add('Base', 'Sentences'), 'Add Base/Sentences');
138
139# Add OpenNLP/sentences
140ok($tokens->add('Base', 'Paragraphs'), 'Add Base/Paragraphs');
141
Nils Diewald1448c262015-10-01 17:25:33 +0000142is_deeply(
143 _t2h($tokens->stream->pos(0)->to_string),
144 _t2h('[(0-3)-:base/paragraphs$<i>1|-:base/sentences$<i>1|-:tokens$<i>18|<>:base/s:t#0-129$<i>17<b>0|<>:base/s:p#0-129$<i>17<b>1|<>:base/s:s#0-129$<i>17<b>2|_0#0-3|i:zum|s:Zum]'),
Nils Diewald98767bb2014-04-25 20:31:19 +0000145 'Correct base annotation');
146
Nils Diewald98767bb2014-04-25 20:31:19 +0000147# New instantiation
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000148ok($tokens = new_tokenizer->parse, 'Parse');
Nils Diewald98767bb2014-04-25 20:31:19 +0000149
150# Add CoreNLP/NamedEntities
151ok($tokens->add('CoreNLP', 'NamedEntities', 'ne_dewac_175m_600'), 'Add CoreNLP/NamedEntities');
152ok($tokens->add('CoreNLP', 'NamedEntities', 'ne_hgc_175m_600'), 'Add CoreNLP/NamedEntities');
153
Nils Diewald6d565072014-10-30 23:20:58 +0000154# [(64-73)s:Hofbergli|i:hofbergli|_9#64-73|corenlp/ne_dewac_175m_600:I-LOC|corenlp/ne_hgc_175m_600:I-LOC]
Nils Diewald1448c262015-10-01 17:25:33 +0000155is_deeply(
156 _t2h($tokens->stream->pos(9)->to_string),
157 _t2h('[(64-73)_9#64-73|corenlp/ne:I-LOC|i:hofbergli|s:Hofbergli]'),
158 'Correct NamedEntities annotation'
159);
Nils Diewald98767bb2014-04-25 20:31:19 +0000160
Nils Diewald98767bb2014-04-25 20:31:19 +0000161# New instantiation
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000162ok($tokens = new_tokenizer->parse, 'Parse');
Nils Diewald98767bb2014-04-25 20:31:19 +0000163
164# Add CoreNLP/Morpho
165ok($tokens->add('CoreNLP', 'Morpho'), 'Add CoreNLP/Morpho');
166
Nils Diewald1448c262015-10-01 17:25:33 +0000167is_deeply(
168 _t2h($tokens->stream->pos(0)->to_string),
169 _t2h('[(0-3)-:tokens$<i>18|_0#0-3|corenlp/p:APPRART|i:zum|s:Zum]'),
170 'Correct corenlp annotation'
171);
Nils Diewald98767bb2014-04-25 20:31:19 +0000172
173$i = 0;
174foreach (qw/APPRART ADJ ADJA NN VVFIN ART NN ART NN NE PTKVZ KOUS ART NN NN NN VVPP VAFIN/) {
175 like($tokens->stream->pos($i++)->to_string,
176 qr!\|corenlp/p:$_!,
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000177 'Annotation (CoreNLP/p) is correct: '. $_);
Nils Diewald98767bb2014-04-25 20:31:19 +0000178};
179
Nils Diewald6a2a14b2015-06-17 20:34:24 +0000180
Nils Diewald98767bb2014-04-25 20:31:19 +0000181# Add CoreNLP/Sentences
182ok($tokens->add('CoreNLP', 'Sentences'), 'Add CoreNLP/Sentences');
183
Nils Diewald1448c262015-10-01 17:25:33 +0000184is_deeply(
185 _t2h($tokens->stream->pos(0)->to_string),
186 _t2h('[(0-3)-:corenlp/sentences$<i>1|-:tokens$<i>18|<>:corenlp/s:s#0-129$<i>17<b>0|_0#0-3|corenlp/p:APPRART|i:zum|s:Zum]'),
187 # '[(0-3)-:corenlp/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|corenlp/p:APPRART|<>:corenlp/s:s#0-129$<i>17]',
188 'Correct corenlp annotation'
189);
Nils Diewald6a2a14b2015-06-17 20:34:24 +0000190
Nils Diewald98767bb2014-04-25 20:31:19 +0000191# New instantiation
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000192ok($tokens = new_tokenizer->parse, 'New Tokenizer');
Nils Diewald98767bb2014-04-25 20:31:19 +0000193
194# Add CoreNLP/Sentences
195ok($tokens->add('Connexor', 'Sentences'), 'Add Connexor/Sentences');
196
Nils Diewald1448c262015-10-01 17:25:33 +0000197is_deeply(
198 _t2h($tokens->stream->pos(0)->to_string),
199 _t2h('[(0-3)-:cnx/sentences$<i>1|-:tokens$<i>18|<>:cnx/s:s#0-129$<i>17<b>0|_0#0-3|i:zum|s:Zum]'),
200 # '[(0-3)-:cnx/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|<>:cnx/s:s#0-129$<i>17<b>0]',
201 'Correct cnx annotation'
202);
Nils Diewald98767bb2014-04-25 20:31:19 +0000203
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000204# New instantiation
205ok($tokens = new_tokenizer->parse, 'New Tokenizer');
206
207# Add Connexor/Morpho
208ok($tokens->add('Connexor', 'Morpho'), 'Add Connexor/Morpho');
209
210$i = 0;
211foreach (qw/! A A N V DET N DET N N NUM CS DET N N N V V/) {
212 if ($_ eq '!') {
213 $i++;
214 next;
215 };
216 like($tokens->stream->pos($i++)->to_string,
217 qr!\|cnx/p:$_!,
218 'Annotation (Connexor/p) is correct: ' . $_);
219};
220
Nils Diewald6d565072014-10-30 23:20:58 +0000221
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000222$i = 0;
223foreach (qw/! ! ! ! IND:PRES ! ! ! ! Prop ! ! ! ! ! ! PCP:PERF IND:PRES/) {
224 if ($_ eq '!') {
225 $i++;
226 next;
227 };
228 foreach my $f (split(':', $_)) {
229 like($tokens->stream->pos($i)->to_string,
230 qr!\|cnx/m:$f!,
231 'Annotation (Connexor/m) is correct: '. $f);
232 };
233 $i++;
234};
235
236# New instantiation
237ok($tokens = new_tokenizer->parse, 'New Tokenizer');
238
239# Add Connexor/Phrase
240ok($tokens->add('Connexor', 'Phrase'), 'Add Connexor/Phrase');
241my $stream = $tokens->stream;
Nils Diewald6a2a14b2015-06-17 20:34:24 +0000242like($stream->pos(1)->to_string, qr!<>:cnx/c:np#4-30\$<i>4<b>0!, 'Annotation (Connexor/c) is correct');
243like($stream->pos(6)->to_string, qr!<>:cnx/c:np#40-47\$<i>7<b>0!, 'Annotation (Connexor/c) is correct');
244like($stream->pos(8)->to_string, qr!<>:cnx/c:np#52-73\$<i>10<b>0!, 'Annotation (Connexor/c) is correct');
245like($stream->pos(13)->to_string, qr!<>:cnx/c:np#89-111\$<i>16<b>0!, 'Annotation (Connexor/c) is correct');
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000246
247# New instantiation
248ok($tokens = new_tokenizer->parse, 'New Tokenizer');
249
250# Add Connexor/Syntax
251ok($tokens->add('Connexor', 'Syntax'), 'Add Connexor/Syntax');
252$stream = $tokens->stream;
253
254$i = 0;
255foreach (qw/! @PREMOD @PREMOD @NH @MAIN @PREMOD @NH @PREMOD
256 @PREMOD @NH @NH @PREMARK @PREMOD @PREMOD @NH @NH @MAIN @AUX/) {
257 if ($_ eq '!') {
258 $i++;
259 next;
260 };
261 like($tokens->stream->pos($i++)->to_string,
262 qr!\|cnx/syn:$_!,
263 'Annotation (Connexor/syn) is correct: ' . $_);
264};
265
266# New instantiation
267ok($tokens = new_tokenizer->parse, 'New Tokenizer');
268
269# Add XIP/Sentences
270ok($tokens->add('XIP', 'Sentences'), 'Add XIP/Sentences');
271
Nils Diewald1448c262015-10-01 17:25:33 +0000272is_deeply(
273 _t2h($tokens->stream->pos(0)->to_string),
274 _t2h('[(0-3)-:tokens$<i>18|-:xip/sentences$<i>1|<>:xip/s:s#0-129$<i>17<b>0|_0#0-3|i:zum|s:Zum]'),
275 # '[(0-3)-:tokens$<i>18|_0#0-3|i:zum|s:Zum|-:xip/sentences$<i>1|<>:xip/s:s#0-129$<i>17<b>0]',
276 'First sentence'
277);
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000278
279# Add XIP/Morpho
280ok($tokens->add('XIP', 'Morpho'), 'Add XIP/Morpho');
281$stream = $tokens->stream;
282
283$i = 0;
284foreach (qw/PREP ADJ ADJ NOUN VERB DET NOUN DET NOUN NOUN PTCL CONJ DET NOUN NOUN NOUN VERB VERB/) {
285 if ($_ eq '!') {
286 $i++;
287 next;
288 };
289 like($tokens->stream->pos($i++)->to_string,
290 qr!\|xip/p:$_!,
291 'Annotation (xip/p) is correct: ' . $_);
292};
293
294$i = 0;
Nils Diewald6a2a14b2015-06-17 20:34:24 +0000295foreach ('zu', 'letzt', 'kulturell', 'Anlass', '=laden:laden', 'die', 'Leitung', 'der', '\#schulen:\#Heim:schulen\#Heim', 'Hofbergli', 'ein', 'bevor', 'der', 'Betrieb', 'Ende', '\#schulen:\#Jahr:schulen\#Jahr') {
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000296 if ($_ eq '!') {
297 $i++;
298 next;
299 };
300 foreach my $f (split(':', $_)) {
301 like($tokens->stream->pos($i)->to_string,
Nils Diewald6a2a14b2015-06-17 20:34:24 +0000302 qr!\|xip\/l:\Q$f\E!,
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000303 'Annotation (xip/l) is correct: ' . $f);
304 };
305 $i++;
306};
307
308# New instantiation
309ok($tokens = new_tokenizer->parse, 'New Tokenizer');
310
311# Add XIP/Sentences
Nils Diewald47c3ef32014-04-30 19:13:17 +0000312ok($tokens->add('XIP', 'Dependency'), 'Add XIP/Dependency');
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000313
314$stream = $tokens->stream;
Nils Diewald1448c262015-10-01 17:25:33 +0000315diag $stream->pos(1)->to_string;
316
317like($stream->pos(1)->to_string, qr![^<]>:xip/d:NMOD\$<i>3!, 'Dependency fine');
318like($stream->pos(3)->to_string, qr![^<]<:xip/d:NMOD\$<i>1!, 'Dependency fine');
319
320done_testing;
321__END__
322
323
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000324like($stream->pos(3)->to_string, qr!\|<:xip/d:NMOD\$<i>2!, 'Dependency fine');
325like($stream->pos(4)->to_string, qr!\|>xip/d:VMAIN\$<i>4!, 'Dependency fine');
326like($stream->pos(4)->to_string, qr!\|<:xip/d:SUBJ\$<i>6!, 'Dependency fine');
327like($stream->pos(4)->to_string, qr!\|<:xip/d:VPREF\$<i>10!, 'Dependency fine');
328like($stream->pos(5)->to_string, qr!\|>:xip/d:DETERM\$<i>6!, 'Dependency fine');
329like($stream->pos(6)->to_string, qr!\|<:xip/d:DETERM\$<i>5!, 'Dependency fine');
330like($stream->pos(6)->to_string, qr!\|>:xip/d:SUBJ\$<i>4!, 'Dependency fine');
331like($stream->pos(6)->to_string, qr!\|<:xip/d:NMOD\$<i>8!, 'Dependency fine');
332like($stream->pos(7)->to_string, qr!\|>:xip/d:DETERM\$<i>8!, 'Dependency fine');
333like($stream->pos(8)->to_string, qr!\|<:xip/d:DETERM\$<i>7!, 'Dependency fine');
334like($stream->pos(8)->to_string, qr!\|>:xip/d:NMOD\$<i>6!, 'Dependency fine');
335like($stream->pos(8)->to_string, qr!\|<:xip/d:NMOD\$<i>9!, 'Dependency fine');
336like($stream->pos(9)->to_string, qr!\|>:xip/d:NMOD\$<i>8!, 'Dependency fine');
337like($stream->pos(10)->to_string, qr!\|>:xip/d:VPREF\$<i>4!, 'Dependency fine');
338like($stream->pos(11)->to_string, qr!\|>:xip/d:CONNECT\$<i>16!, 'Dependency fine');
339like($stream->pos(12)->to_string, qr!\|>:xip/d:DETERM\$<i>13!, 'Dependency fine');
340like($stream->pos(13)->to_string, qr!\|<:xip/d:DETERM\$<i>12!, 'Dependency fine');
341like($stream->pos(13)->to_string, qr!\|>:xip/d:SUBJ\$<i>16!, 'Dependency fine');
342like($stream->pos(14)->to_string, qr!\|>:xip/d:OBJ\$<i>16!, 'Dependency fine');
343like($stream->pos(15)->to_string, qr!\|>:xip/d:OBJ\$<i>16!, 'Dependency fine');
344like($stream->pos(16)->to_string, qr!\|<:xip/d:CONNECT\$<i>11!, 'Dependency fine');
345like($stream->pos(16)->to_string, qr!\|<:xip/d:SUBJ\$<i>13!, 'Dependency fine');
346like($stream->pos(16)->to_string, qr!\|<:xip/d:OBJ\$<i>14!, 'Dependency fine');
347like($stream->pos(16)->to_string, qr!\|<:xip/d:OBJ\$<i>15!, 'Dependency fine');
348like($stream->pos(16)->to_string, qr!\|>:xip/d:AUXIL\$<i>17!, 'Dependency fine');
349like($stream->pos(16)->to_string, qr!\|>xip/d:VMAIN\$<i>16!, 'Dependency fine');
350like($stream->pos(16)->to_string, qr!\|<xip/d:VMAIN\$<i>16!, 'Dependency fine');
351like($stream->pos(17)->to_string, qr!\|<:xip/d:AUXIL\$<i>16!, 'Dependency fine');
352
Nils Diewald47c3ef32014-04-30 19:13:17 +0000353# New instantiation
354ok($tokens = new_tokenizer->parse, 'New Tokenizer');
355
356# Add XIP/Sentences
357ok($tokens->add('XIP', 'Constituency'), 'Add XIP/Constituency');
358
359$stream = $tokens->stream;
360like($stream->pos(0)->to_string, qr!\|<>:xip/c:TOP#0-129\$<i>17!, 'Constituency fine');
361like($stream->pos(0)->to_string, qr!\|<>:xip/c:MC#0-129\$<i>17<b>1!, 'Constituency fine');
362like($stream->pos(0)->to_string, qr!\|<>:xip/c:PP#0-30\$<i>4<b>2!, 'Constituency fine');
363like($stream->pos(0)->to_string, qr!\|<>:xip/c:PREP#0-3\$<i>1!, 'Constituency fine');
364
365like($stream->pos(1)->to_string, qr!\|<>:xip/c:NP#4-30\$<i>4<b>3!, 'Constituency fine');
366like($stream->pos(1)->to_string, qr!\|<>:xip/c:NPA#4-30\$<i>4<b>4!, 'Constituency fine');
367like($stream->pos(1)->to_string, qr!\|<>:xip/c:AP#4-11\$<i>2<b>5!, 'Constituency fine');
368like($stream->pos(1)->to_string, qr!\|<>:xip/c:ADJ#4-11\$<i>2<b>6!, 'Constituency fine');
369
370like($stream->pos(2)->to_string, qr!\|<>:xip/c:AP#12-23\$<i>3<b>5!, 'Constituency fine');
371like($stream->pos(2)->to_string, qr!\|<>:xip/c:ADJ#12-23\$<i>3<b>6!, 'Constituency fine');
372
373like($stream->pos(3)->to_string, qr!\|<>:xip/c:NOUN#24-30\$<i>4<b>5!, 'Constituency fine');
374
375like($stream->pos(4)->to_string, qr!\|<>:xip/c:VERB#31-35\$<i>5<b>2!, 'Constituency fine');
376
377like($stream->pos(5)->to_string, qr!\|<>:xip/c:NP#36-47\$<i>7<b>2!, 'Constituency fine');
378like($stream->pos(5)->to_string, qr!\|<>:xip/c:DET#36-39\$<i>6<b>3!, 'Constituency fine');
379
380like($stream->pos(6)->to_string, qr!\|<>:xip/c:NPA#40-47\$<i>7<b>3!, 'Constituency fine');
381like($stream->pos(6)->to_string, qr!\|<>:xip/c:NOUN#40-47\$<i>7<b>4!, 'Constituency fine');
382
383like($stream->pos(7)->to_string, qr!\|<>:xip/c:NP#48-63\$<i>9<b>2!, 'Constituency fine');
384like($stream->pos(7)->to_string, qr!\|<>:xip/c:DET#48-51\$<i>8<b>3!, 'Constituency fine');
385
386like($stream->pos(8)->to_string, qr!\|<>:xip/c:NPA#52-63\$<i>9<b>3!, 'Constituency fine');
387like($stream->pos(8)->to_string, qr!\|<>:xip/c:NOUN#52-63\$<i>9<b>4!, 'Constituency fine');
388
389like($stream->pos(9)->to_string, qr!\|<>:xip/c:NP#64-73\$<i>10<b>2!, 'Constituency fine');
390like($stream->pos(9)->to_string, qr!\|<>:xip/c:NPA#64-73\$<i>10<b>3!, 'Constituency fine');
391like($stream->pos(9)->to_string, qr!\|<>:xip/c:NOUN#64-73\$<i>10<b>4!, 'Constituency fine');
392
393like($stream->pos(10)->to_string, qr!\|<>:xip/c:PTCL#74-77\$<i>11<b>2!, 'Constituency fine');
394
395like($stream->pos(11)->to_string, qr!\|<>:xip/c:SC#79-128\$<i>18!, 'Constituency fine');
396like($stream->pos(11)->to_string, qr!\|<>:xip/c:CONJ#79-84\$<i>12<b>1!, 'Constituency fine');
397
398like($stream->pos(12)->to_string, qr!\|<>:xip/c:NP#85-96\$<i>14<b>1!, 'Constituency fine');
399like($stream->pos(12)->to_string, qr!\|<>:xip/c:DET#85-88\$<i>13<b>2!, 'Constituency fine');
400
401
402like($stream->pos(13)->to_string, qr!\|<>:xip/c:NPA#89-96\$<i>14<b>2!, 'Constituency fine');
403like($stream->pos(13)->to_string, qr!\|<>:xip/c:NOUN#89-96\$<i>14<b>3!, 'Constituency fine');
404
405like($stream->pos(14)->to_string, qr!\|<>:xip/c:NP#97-101\$<i>15<b>1!, 'Constituency fine');
406like($stream->pos(14)->to_string, qr!\|<>:xip/c:NPA#97-101\$<i>15<b>2!, 'Constituency fine');
407like($stream->pos(14)->to_string, qr!\|<>:xip/c:NOUN#97-101\$<i>15<b>3!, 'Constituency fine');
408
409like($stream->pos(15)->to_string, qr!\|<>:xip/c:NP#102-111\$<i>16<b>1!, 'Constituency fine');
410like($stream->pos(15)->to_string, qr!\|<>:xip/c:NPA#102-111\$<i>16<b>2!, 'Constituency fine');
411like($stream->pos(15)->to_string, qr!\|<>:xip/c:NOUN#102-111\$<i>16<b>3!, 'Constituency fine');
412
413like($stream->pos(16)->to_string, qr!\|<>:xip/c:VERB#112-123\$<i>17<b>1!, 'Constituency fine');
414
415like($stream->pos(17)->to_string, qr!\|<>:xip/c:VERB#124-128\$<i>18<b>1!, 'Constituency fine');
416
417# diag $stream->to_string;
418
Nils Diewald21a3e1a2014-04-28 18:48:16 +0000419
420# ADJA ADJA NN VVFIN ART NN ART NN NE PTKVZ KOUS ART NN NN NN VVPP VAFIN
421done_testing;
422__END__
Nils Diewald98767bb2014-04-25 20:31:19 +0000423
424
425# Todo: CoreNLP/Constituency!
Nils Diewald98767bb2014-04-25 20:31:19 +0000426
427
Nils Diewald98767bb2014-04-25 20:31:19 +0000428
429
430
431# Connexor
432push(@layers, ['Connexor', 'Morpho']);
433push(@layers, ['Connexor', 'Syntax']);
434push(@layers, ['Connexor', 'Phrase']);
435push(@layers, ['Connexor', 'Sentences']);
436
437# TreeTagger
438push(@layers, ['TreeTagger', 'Morpho']);
439push(@layers, ['TreeTagger', 'Sentences']);
440
441# Mate
442# push(@layers, ['Mate', 'Morpho']);
443push(@layers, ['Mate', 'Dependency']);
444
445# XIP
446push(@layers, ['XIP', 'Morpho']);
447push(@layers, ['XIP', 'Constituency']);
448push(@layers, ['XIP', 'Dependency']);
449push(@layers, ['XIP', 'Sentences']);
450
451
Nils Diewald98767bb2014-04-25 20:31:19 +0000452__END__