| =pod |
| |
| =encoding utf8 |
| |
| =head1 NAME |
| |
| KorAP::XML::Krill - Preprocess KorAP XML documents for Krill |
| |
| |
| =head1 SYNOPSIS |
| |
| # Create Converter Object |
| my $doc = KorAP::XML::Krill->new( |
| path => 'mydoc-1/' |
| ); |
| |
| # Convert to krill json |
| print $doc->parse->tokenize->annotate('Mate', 'Morpho')->to_json; |
| |
| |
| =head1 DESCRIPTION |
| |
| Parse the primary and meta data of a KorAP-XML document. |
| |
| |
| =head1 ATTRIBUTES |
| |
| =head2 log |
| |
| L<Log::Log4perl> object for logging. |
| |
| =head2 path |
| |
| $doc->path("example-004/"); |
| print $doc->path; |
| |
| The path of the document. |
| |
| |
| =head2 primary |
| |
| print $doc->primary->data(0,20); |
| |
| The L<KorAP::XML::Document::Primary> object containing the primary data. |
| |
| |
| =head1 METHODS |
| |
| =head2 annotate |
| |
| $doc->annotate('Mate', 'Morpho'); |
| |
| Add annotation layer to conversion process. |
| |
| |
| =head2 parse |
| |
| $doc = $doc->parse; |
| |
| Run the meta parsing process of the document. |
| |
| |
| =head2 tokenize |
| |
| $doc = $doc->tokenize('OpenNLP', 'Tokens'); |
| |
| Accept the tokenization based on a given foundry and a given layer. |
| |
| |
| =head1 AVAILABILITY |
| |
| https://github.com/KorAP/KorAP-XML-Krill |
| |
| |
| =head1 COPYRIGHT AND LICENSE |
| |
| Copyright (C) 2015-2016, L<IDS Mannheim|http://www.ids-mannheim.de/> |
| Author: L<Nils Diewald|http://nils-diewald.de/> |
| |
| KorAP::XML::Krill is developed as part of the |
| L<KorAP|http://korap.ids-mannheim.de/> |
| Corpus Analysis Platform at the |
| L<Institute for the German Language (IDS)|http://ids-mannheim.de/>, |
| member of the |
| L<Leibniz-Gemeinschaft|http://www.leibniz-gemeinschaft.de/en/about-us/leibniz-competition/projekte-2011/2011-funding-line-2/> |
| and supported by the L<KobRA|http://www.kobra.tu-dortmund.de> project, |
| funded by the |
| L<Federal Ministry of Education and Research (BMBF)|http://www.bmbf.de/en/>. |
| |
| KorAP::XML::Krill is free software published under the |
| L<BSD-2 License|https://raw.githubusercontent.com/KorAP/KorAP-XML-Krill/master/LICENSE>. |
| |
| =cut |