Akron | 14ca9f0 | 2016-01-29 19:38:18 +0100 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | use utf8; |
| 5 | use Test::More skip_all => 'Not yet implemented'; |
Akron | 151676d | 2016-03-14 20:12:14 +0100 | [diff] [blame] | 6 | use lib 't/annotation'; |
Akron | 14ca9f0 | 2016-01-29 19:38:18 +0100 | [diff] [blame] | 7 | use File::Basename 'dirname'; |
| 8 | use File::Spec::Functions 'catdir'; |
| 9 | |
| 10 | my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001'); |
| 11 | |
| 12 | use_ok('KorAP::XML::Krill'); |
| 13 | |
| 14 | ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document'); |
| 15 | |
| 16 | ok($doc->parse |
| 17 | ->tokenize |
Akron | 941c1a6 | 2016-02-23 17:41:41 +0100 | [diff] [blame] | 18 | ->annotate('Base', 'Sentences') |
| 19 | ->annotate('Base', 'Paragraphs') |
| 20 | ->annotate('DeReKo', 'Struct'), 'Annotate'); |
Akron | 14ca9f0 | 2016-01-29 19:38:18 +0100 | [diff] [blame] | 21 | |
| 22 | # Metdata |
| 23 | is($doc->text_sigle, 'Corpus_Doc.0001', 'ID-text'); |
| 24 | is($doc->doc_sigle, 'Corpus_Doc', 'ID-doc'); |
| 25 | is($doc->corpus_sigle, 'Corpus', 'ID-corpus'); |
| 26 | is($doc->title, 'Beispiel Text', 'title'); |
| 27 | is($doc->sub_title, 'Beispiel Text Untertitel', 'title'); |
| 28 | |
Akron | 941c1a6 | 2016-02-23 17:41:41 +0100 | [diff] [blame] | 29 | # diag $doc->to_json; |
| 30 | |
Akron | 14ca9f0 | 2016-01-29 19:38:18 +0100 | [diff] [blame] | 31 | done_testing; |
| 32 | __END__ |
Akron | 941c1a6 | 2016-02-23 17:41:41 +0100 | [diff] [blame] | 33 | |
| 34 | { |
| 35 | "@context" : "http://korap.ids-mannheim.de/ns/koral/0.4/context.jsonld", |
| 36 | # Add krill context! |
| 37 | "text" : { |
| 38 | "@type" : "koral:corpus", |
| 39 | "meta" : { |
| 40 | "@type" : "koral:meta", |
| 41 | "s_sigle" : "BSP", |
| 42 | "s_id" : "BSP", |
| 43 | "t_title" : "Der Name als Text", |
| 44 | "k_keywords" : ["Some", "Keywords"], |
| 45 | "d_date" : "2015-12-03" |
| 46 | }, |
| 47 | "@value" : { |
| 48 | "@type" : "koral:doc", |
| 49 | "meta" : { |
| 50 | "@type" : "koral:meta", |
| 51 | "s_sigle" : "BSP/AAA", |
| 52 | "s_id" : "AAA" |
| 53 | }, |
| 54 | "@value" : { |
| 55 | "@type" : "koral:text", |
| 56 | "meta" : { |
| 57 | "@type" : "koral:meta", |
| 58 | "s_sigle" : "BSP/AAA/0001", |
| 59 | "s_id" : "0001", |
| 60 | "s_language" : "de" |
| 61 | }, |
| 62 | "store" : { |
| 63 | ... |
| 64 | }, |
| 65 | "@value" : { |
| 66 | "@type" : "krill:stream", |
| 67 | "source" : "opennlp#tokens", |
| 68 | "layer" : ["base/s=spans"], |
| 69 | "primary" : "...", |
| 70 | "name" : "tokens", |
| 71 | "foundries": ["base","base/paragraphs","base/sentences"], |
| 72 | "stream" : [[ ... ], [ ... ]] |
| 73 | } |
| 74 | } |
| 75 | } |
| 76 | } |
| 77 | } |