New structure is KorAP::XML::Krill
Change-Id: I42297512b99acca4ab011306d11c095641397af5
diff --git a/Changes b/Changes
index 4cf41ea..def9338 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.05 2016-01-28
+ - Changed KorAP::Document to KorAP::XML::Krill.
+
0.04 2016-01-28
- Added PTI to all payloads.
- Added support for empty elements.
diff --git a/Makefile.PL b/Makefile.PL
index 9818ad1..5034c5f 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -5,10 +5,10 @@
use ExtUtils::MakeMaker;
WriteMakefile(
- NAME => 'KorAP::Indexer',
+ NAME => 'KorAP::XML::Krill',
AUTHOR => 'Nils Diewald',
- ABSTRACT => 'Preprocessor for Krill Index preparation',
- VERSION_FROM => 'lib/KorAP/Indexer.pm',
+ ABSTRACT => 'Preprocess KorAP XML documents for Krill',
+ VERSION_FROM => 'lib/KorAP/XML/Krill.pm',
PREREQ_PM => {
'Mojolicious' => 6.11,
'Packed::Array' => 0.01,
diff --git a/lib/KorAP/Indexer.pm b/lib/KorAP/Indexer.pm
deleted file mode 100644
index a428437..0000000
--- a/lib/KorAP/Indexer.pm
+++ /dev/null
@@ -1,5 +0,0 @@
-package KorAP::Indexer;
-
-our $VERSION = 0.04;
-
-1;
diff --git a/lib/KorAP/Document/Primary.pm b/lib/KorAP/XML/Document/Primary.pm
similarity index 95%
rename from lib/KorAP/Document/Primary.pm
rename to lib/KorAP/XML/Document/Primary.pm
index 4acd66a..54ed099 100644
--- a/lib/KorAP/Document/Primary.pm
+++ b/lib/KorAP/XML/Document/Primary.pm
@@ -1,4 +1,4 @@
-package KorAP::Document::Primary;
+package KorAP::XML::Document::Primary;
use strict;
use warnings;
use Carp qw/croak carp/;
@@ -151,11 +151,11 @@
=head1 NAME
-KorAP::Document::Primary
+KorAP::XML::Document::Primary
=head1 SYNOPSIS
- my $text = KorAP::Document::Primary('Das ist mein Text');
+ my $text = KorAP::XML::Document::Primary('Das ist mein Text');
print $text->data(2,5);
print $text->data_length;
diff --git a/lib/KorAP/Field/MultiTerm.pm b/lib/KorAP/XML/Field/MultiTerm.pm
similarity index 98%
rename from lib/KorAP/Field/MultiTerm.pm
rename to lib/KorAP/XML/Field/MultiTerm.pm
index 835749d..731383a 100644
--- a/lib/KorAP/Field/MultiTerm.pm
+++ b/lib/KorAP/XML/Field/MultiTerm.pm
@@ -1,4 +1,4 @@
-package KorAP::Field::MultiTerm;
+package KorAP::XML::Field::MultiTerm;
use strict;
use warnings;
use MIME::Base64;
diff --git a/lib/KorAP/Field/MultiTermToken.pm b/lib/KorAP/XML/Field/MultiTermToken.pm
similarity index 95%
rename from lib/KorAP/Field/MultiTermToken.pm
rename to lib/KorAP/XML/Field/MultiTermToken.pm
index 4884e7f..82a87d2 100644
--- a/lib/KorAP/Field/MultiTermToken.pm
+++ b/lib/KorAP/XML/Field/MultiTermToken.pm
@@ -1,5 +1,5 @@
-package KorAP::Field::MultiTermToken;
-use KorAP::Field::MultiTerm;
+package KorAP::XML::Field::MultiTermToken;
+use KorAP::XML::Field::MultiTerm;
use List::MoreUtils 'uniq';
use Carp qw/carp croak/;
use strict;
@@ -17,10 +17,10 @@
my $mt;
unless (ref $_[0] eq 'MultiTerm') {
if (@_ == 1) {
- $mt = KorAP::Field::MultiTerm->new(term => $_[0]);
+ $mt = KorAP::XML::Field::MultiTerm->new(term => $_[0]);
}
else {
- $mt = KorAP::Field::MultiTerm->new(@_);
+ $mt = KorAP::XML::Field::MultiTerm->new(@_);
};
}
else {
diff --git a/lib/KorAP/Field/MultiTermTokenStream.pm b/lib/KorAP/XML/Field/MultiTermTokenStream.pm
similarity index 84%
rename from lib/KorAP/Field/MultiTermTokenStream.pm
rename to lib/KorAP/XML/Field/MultiTermTokenStream.pm
index 152bfa1..33eec80 100644
--- a/lib/KorAP/Field/MultiTermTokenStream.pm
+++ b/lib/KorAP/XML/Field/MultiTermTokenStream.pm
@@ -1,12 +1,12 @@
-package KorAP::Field::MultiTermTokenStream;
+package KorAP::XML::Field::MultiTermTokenStream;
use Mojo::Base -base;
-use KorAP::Field::MultiTermToken;
+use KorAP::XML::Field::MultiTermToken;
has [qw/oStart oEnd/];
sub add {
my $self = shift;
- my $mtt = shift // KorAP::Field::MultiTermToken->new;
+ my $mtt = shift // KorAP::XML::Field::MultiTermToken->new;
$self->{mtt} //= [];
$self->{tui} //= [];
push(@{$self->{mtt}}, $mtt);
diff --git a/lib/KorAP/Index/Base.pm b/lib/KorAP/XML/Index/Base.pm
similarity index 86%
rename from lib/KorAP/Index/Base.pm
rename to lib/KorAP/XML/Index/Base.pm
index 705950d..8daa8c0 100644
--- a/lib/KorAP/Index/Base.pm
+++ b/lib/KorAP/XML/Index/Base.pm
@@ -1,4 +1,4 @@
-package KorAP::Index::Base;
+package KorAP::XML::Index::Base;
use strict;
use warnings;
@@ -37,4 +37,4 @@
=pod
-=head1 KorAP::Index::Base
+=head1 KorAP::XML::Index::Base
diff --git a/lib/KorAP/Index/Base/Paragraphs.pm b/lib/KorAP/XML/Index/Base/Paragraphs.pm
similarity index 88%
rename from lib/KorAP/Index/Base/Paragraphs.pm
rename to lib/KorAP/XML/Index/Base/Paragraphs.pm
index 1cb2e99..caf0161 100644
--- a/lib/KorAP/Index/Base/Paragraphs.pm
+++ b/lib/KorAP/XML/Index/Base/Paragraphs.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Base::Paragraphs;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Base::Paragraphs;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Base/Sentences.pm b/lib/KorAP/XML/Index/Base/Sentences.pm
similarity index 92%
rename from lib/KorAP/Index/Base/Sentences.pm
rename to lib/KorAP/XML/Index/Base/Sentences.pm
index 449f331..1ca71d7 100644
--- a/lib/KorAP/Index/Base/Sentences.pm
+++ b/lib/KorAP/XML/Index/Base/Sentences.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Base::Sentences;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Base::Sentences;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Connexor/Morpho.pm b/lib/KorAP/XML/Index/Connexor/Morpho.pm
similarity index 95%
rename from lib/KorAP/Index/Connexor/Morpho.pm
rename to lib/KorAP/XML/Index/Connexor/Morpho.pm
index a6970f5..6aba416 100644
--- a/lib/KorAP/Index/Connexor/Morpho.pm
+++ b/lib/KorAP/XML/Index/Connexor/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Connexor::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Connexor::Morpho;
+use KorAP::XML::Index::Base;
our %MAP = (
'v_ind' => 'mood',
diff --git a/lib/KorAP/Index/Connexor/Phrase.pm b/lib/KorAP/XML/Index/Connexor/Phrase.pm
similarity index 89%
rename from lib/KorAP/Index/Connexor/Phrase.pm
rename to lib/KorAP/XML/Index/Connexor/Phrase.pm
index 309a52b..c3b257f 100644
--- a/lib/KorAP/Index/Connexor/Phrase.pm
+++ b/lib/KorAP/XML/Index/Connexor/Phrase.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Connexor::Phrase;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Connexor::Phrase;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Connexor/Sentences.pm b/lib/KorAP/XML/Index/Connexor/Sentences.pm
similarity index 87%
rename from lib/KorAP/Index/Connexor/Sentences.pm
rename to lib/KorAP/XML/Index/Connexor/Sentences.pm
index 09246db..cc4d43c 100644
--- a/lib/KorAP/Index/Connexor/Sentences.pm
+++ b/lib/KorAP/XML/Index/Connexor/Sentences.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Connexor::Sentences;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Connexor::Sentences;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Connexor/Syntax.pm b/lib/KorAP/XML/Index/Connexor/Syntax.pm
similarity index 87%
rename from lib/KorAP/Index/Connexor/Syntax.pm
rename to lib/KorAP/XML/Index/Connexor/Syntax.pm
index d27801f..758fe9e 100644
--- a/lib/KorAP/Index/Connexor/Syntax.pm
+++ b/lib/KorAP/XML/Index/Connexor/Syntax.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Connexor::Syntax;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Connexor::Syntax;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/CoreNLP/Constituency.pm b/lib/KorAP/XML/Index/CoreNLP/Constituency.pm
similarity index 96%
rename from lib/KorAP/Index/CoreNLP/Constituency.pm
rename to lib/KorAP/XML/Index/CoreNLP/Constituency.pm
index 2e21565..48f34b8 100644
--- a/lib/KorAP/Index/CoreNLP/Constituency.pm
+++ b/lib/KorAP/XML/Index/CoreNLP/Constituency.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::CoreNLP::Constituency;
-use KorAP::Index::Base;
+package KorAP::XML::Index::CoreNLP::Constituency;
+use KorAP::XML::Index::Base;
use Set::Scalar;
sub parse {
diff --git a/lib/KorAP/Index/CoreNLP/Morpho.pm b/lib/KorAP/XML/Index/CoreNLP/Morpho.pm
similarity index 87%
rename from lib/KorAP/Index/CoreNLP/Morpho.pm
rename to lib/KorAP/XML/Index/CoreNLP/Morpho.pm
index 76cee0b..2ab3301 100644
--- a/lib/KorAP/Index/CoreNLP/Morpho.pm
+++ b/lib/KorAP/XML/Index/CoreNLP/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::CoreNLP::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::CoreNLP::Morpho;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/CoreNLP/NamedEntities.pm b/lib/KorAP/XML/Index/CoreNLP/NamedEntities.pm
similarity index 91%
rename from lib/KorAP/Index/CoreNLP/NamedEntities.pm
rename to lib/KorAP/XML/Index/CoreNLP/NamedEntities.pm
index 5308f83..b5ef3cc 100644
--- a/lib/KorAP/Index/CoreNLP/NamedEntities.pm
+++ b/lib/KorAP/XML/Index/CoreNLP/NamedEntities.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::CoreNLP::NamedEntities;
-use KorAP::Index::Base;
+package KorAP::XML::Index::CoreNLP::NamedEntities;
+use KorAP::XML::Index::Base;
# Import named entities, potentially with a specified
# Model. However - now all models are mapped to the 'ne'-Prefix
diff --git a/lib/KorAP/Index/CoreNLP/Sentences.pm b/lib/KorAP/XML/Index/CoreNLP/Sentences.pm
similarity index 88%
rename from lib/KorAP/Index/CoreNLP/Sentences.pm
rename to lib/KorAP/XML/Index/CoreNLP/Sentences.pm
index abe97d6..4384aee 100644
--- a/lib/KorAP/Index/CoreNLP/Sentences.pm
+++ b/lib/KorAP/XML/Index/CoreNLP/Sentences.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::CoreNLP::Sentences;
-use KorAP::Index::Base;
+package KorAP::XML::Index::CoreNLP::Sentences;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/DeReKo/Structure.pm b/lib/KorAP/XML/Index/DeReKo/Structure.pm
similarity index 95%
rename from lib/KorAP/Index/DeReKo/Structure.pm
rename to lib/KorAP/XML/Index/DeReKo/Structure.pm
index daa20b4..dfb9e62 100644
--- a/lib/KorAP/Index/DeReKo/Structure.pm
+++ b/lib/KorAP/XML/Index/DeReKo/Structure.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::DeReKo::Structure;
-use KorAP::Index::Base;
+package KorAP::XML::Index::DeReKo::Structure;
+use KorAP::XML::Index::Base;
use Data::Dumper;
sub parse {
diff --git a/lib/KorAP/Index/Glemm/Morpho.pm b/lib/KorAP/XML/Index/Glemm/Morpho.pm
similarity index 93%
rename from lib/KorAP/Index/Glemm/Morpho.pm
rename to lib/KorAP/XML/Index/Glemm/Morpho.pm
index b0bc589..80f75e0 100644
--- a/lib/KorAP/Index/Glemm/Morpho.pm
+++ b/lib/KorAP/XML/Index/Glemm/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Glemm::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Glemm::Morpho;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Malt/Dependency.pm b/lib/KorAP/XML/Index/Malt/Dependency.pm
similarity index 85%
rename from lib/KorAP/Index/Malt/Dependency.pm
rename to lib/KorAP/XML/Index/Malt/Dependency.pm
index 1f55824..7765c1a 100644
--- a/lib/KorAP/Index/Malt/Dependency.pm
+++ b/lib/KorAP/XML/Index/Malt/Dependency.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Malt::Dependency;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Malt::Dependency;
+use KorAP::XML::Index::Base;
use Data::Dumper;
sub parse {
diff --git a/lib/KorAP/Index/Mate/Dependency.pm b/lib/KorAP/XML/Index/Mate/Dependency.pm
similarity index 93%
rename from lib/KorAP/Index/Mate/Dependency.pm
rename to lib/KorAP/XML/Index/Mate/Dependency.pm
index 4b97261..c9d3393 100644
--- a/lib/KorAP/Index/Mate/Dependency.pm
+++ b/lib/KorAP/XML/Index/Mate/Dependency.pm
@@ -1,6 +1,5 @@
-package KorAP::Index::Mate::Dependency;
-use KorAP::Index::Base;
-use Data::Dumper;
+package KorAP::XML::Index::Mate::Dependency;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Mate/Morpho.pm b/lib/KorAP/XML/Index/Mate/Morpho.pm
similarity index 93%
rename from lib/KorAP/Index/Mate/Morpho.pm
rename to lib/KorAP/XML/Index/Mate/Morpho.pm
index 1a06f63..f62465f 100644
--- a/lib/KorAP/Index/Mate/Morpho.pm
+++ b/lib/KorAP/XML/Index/Mate/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Mate::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Mate::Morpho;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Mate/MorphoAttr.pm b/lib/KorAP/XML/Index/Mate/MorphoAttr.pm
similarity index 94%
rename from lib/KorAP/Index/Mate/MorphoAttr.pm
rename to lib/KorAP/XML/Index/Mate/MorphoAttr.pm
index 74ba26b..f937c5f 100644
--- a/lib/KorAP/Index/Mate/MorphoAttr.pm
+++ b/lib/KorAP/XML/Index/Mate/MorphoAttr.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Mate::MorphoAttr;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Mate::MorphoAttr;
+use KorAP::XML::Index::Base;
# This attaches morphological information as attributes to the pos
diff --git a/lib/KorAP/Index/OpenNLP/Morpho.pm b/lib/KorAP/XML/Index/OpenNLP/Morpho.pm
similarity index 88%
rename from lib/KorAP/Index/OpenNLP/Morpho.pm
rename to lib/KorAP/XML/Index/OpenNLP/Morpho.pm
index de8c620..a59d6c0 100644
--- a/lib/KorAP/Index/OpenNLP/Morpho.pm
+++ b/lib/KorAP/XML/Index/OpenNLP/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::OpenNLP::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::OpenNLP::Morpho;
+use KorAP::XML::Index::Base;
use Scalar::Util 'weaken';
sub parse {
diff --git a/lib/KorAP/Index/OpenNLP/Sentences.pm b/lib/KorAP/XML/Index/OpenNLP/Sentences.pm
similarity index 87%
rename from lib/KorAP/Index/OpenNLP/Sentences.pm
rename to lib/KorAP/XML/Index/OpenNLP/Sentences.pm
index 7f1d8d5..f2f60f9 100644
--- a/lib/KorAP/Index/OpenNLP/Sentences.pm
+++ b/lib/KorAP/XML/Index/OpenNLP/Sentences.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::OpenNLP::Sentences;
-use KorAP::Index::Base;
+package KorAP::XML::Index::OpenNLP::Sentences;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/Schreibgebrauch/Lemma.pm b/lib/KorAP/XML/Index/Schreibgebrauch/Lemma.pm
similarity index 91%
rename from lib/KorAP/Index/Schreibgebrauch/Lemma.pm
rename to lib/KorAP/XML/Index/Schreibgebrauch/Lemma.pm
index 5bd01d5..fb2fee4 100644
--- a/lib/KorAP/Index/Schreibgebrauch/Lemma.pm
+++ b/lib/KorAP/XML/Index/Schreibgebrauch/Lemma.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Schreibgebrauch::Lemma;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Schreibgebrauch::Lemma;
+use KorAP::XML::Index::Base;
use Mojo::ByteStream 'b';
sub parse {
diff --git a/lib/KorAP/Index/Schreibgebrauch/Morpho.pm b/lib/KorAP/XML/Index/Schreibgebrauch/Morpho.pm
similarity index 89%
rename from lib/KorAP/Index/Schreibgebrauch/Morpho.pm
rename to lib/KorAP/XML/Index/Schreibgebrauch/Morpho.pm
index fc3fa8d..eb49f85 100644
--- a/lib/KorAP/Index/Schreibgebrauch/Morpho.pm
+++ b/lib/KorAP/XML/Index/Schreibgebrauch/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::Schreibgebrauch::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::Schreibgebrauch::Morpho;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/TreeTagger/Morpho.pm b/lib/KorAP/XML/Index/TreeTagger/Morpho.pm
similarity index 94%
rename from lib/KorAP/Index/TreeTagger/Morpho.pm
rename to lib/KorAP/XML/Index/TreeTagger/Morpho.pm
index 487b78a..5bde977 100644
--- a/lib/KorAP/Index/TreeTagger/Morpho.pm
+++ b/lib/KorAP/XML/Index/TreeTagger/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::TreeTagger::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::TreeTagger::Morpho;
+use KorAP::XML::Index::Base;
use POSIX 'floor';
sub parse {
diff --git a/lib/KorAP/Index/TreeTagger/Sentences.pm b/lib/KorAP/XML/Index/TreeTagger/Sentences.pm
similarity index 87%
rename from lib/KorAP/Index/TreeTagger/Sentences.pm
rename to lib/KorAP/XML/Index/TreeTagger/Sentences.pm
index 1d62d9f..9297817 100644
--- a/lib/KorAP/Index/TreeTagger/Sentences.pm
+++ b/lib/KorAP/XML/Index/TreeTagger/Sentences.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::TreeTagger::Sentences;
-use KorAP::Index::Base;
+package KorAP::XML::Index::TreeTagger::Sentences;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/XIP/Constituency.pm b/lib/KorAP/XML/Index/XIP/Constituency.pm
similarity index 97%
rename from lib/KorAP/Index/XIP/Constituency.pm
rename to lib/KorAP/XML/Index/XIP/Constituency.pm
index 7e2853e..81c7db0 100644
--- a/lib/KorAP/Index/XIP/Constituency.pm
+++ b/lib/KorAP/XML/Index/XIP/Constituency.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::XIP::Constituency;
-use KorAP::Index::Base;
+package KorAP::XML::Index::XIP::Constituency;
+use KorAP::XML::Index::Base;
use Set::Scalar;
use Scalar::Util qw/weaken/;
diff --git a/lib/KorAP/Index/XIP/Dependency.pm b/lib/KorAP/XML/Index/XIP/Dependency.pm
similarity index 94%
rename from lib/KorAP/Index/XIP/Dependency.pm
rename to lib/KorAP/XML/Index/XIP/Dependency.pm
index c112348..a83f911 100644
--- a/lib/KorAP/Index/XIP/Dependency.pm
+++ b/lib/KorAP/XML/Index/XIP/Dependency.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::XIP::Dependency;
-use KorAP::Index::Base;
+package KorAP::XML::Index::XIP::Dependency;
+use KorAP::XML::Index::Base;
# > source to target
# < target to source
diff --git a/lib/KorAP/Index/XIP/Morpho.pm b/lib/KorAP/XML/Index/XIP/Morpho.pm
similarity index 93%
rename from lib/KorAP/Index/XIP/Morpho.pm
rename to lib/KorAP/XML/Index/XIP/Morpho.pm
index 2c82ba7..74bb3ea 100644
--- a/lib/KorAP/Index/XIP/Morpho.pm
+++ b/lib/KorAP/XML/Index/XIP/Morpho.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::XIP::Morpho;
-use KorAP::Index::Base;
+package KorAP::XML::Index::XIP::Morpho;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Index/XIP/Sentences.pm b/lib/KorAP/XML/Index/XIP/Sentences.pm
similarity index 88%
rename from lib/KorAP/Index/XIP/Sentences.pm
rename to lib/KorAP/XML/Index/XIP/Sentences.pm
index 0273b39..1eda81e 100644
--- a/lib/KorAP/Index/XIP/Sentences.pm
+++ b/lib/KorAP/XML/Index/XIP/Sentences.pm
@@ -1,5 +1,5 @@
-package KorAP::Index::XIP::Sentences;
-use KorAP::Index::Base;
+package KorAP::XML::Index::XIP::Sentences;
+use KorAP::XML::Index::Base;
sub parse {
my $self = shift;
diff --git a/lib/KorAP/Document.pm b/lib/KorAP/XML/Krill.pm
similarity index 97%
rename from lib/KorAP/Document.pm
rename to lib/KorAP/XML/Krill.pm
index 8bf9fe7..98d2866 100644
--- a/lib/KorAP/Document.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -1,13 +1,13 @@
-package KorAP::Document;
+package KorAP::XML::Krill;
use Mojo::Base -base;
use Mojo::ByteStream 'b';
use Mojo::Util qw/encode/;
use XML::Fast;
use Try::Tiny;
use Carp qw/croak/;
-use KorAP::Document::Primary;
+use KorAP::XML::Document::Primary;
use Log::Log4perl;
-use KorAP::Log;
+use KorAP::XML::Log;
use Mojo::DOM;
use Data::Dumper;
use File::Spec::Functions qw/catdir catfile catpath splitdir splitpath rel2abs/;
@@ -16,6 +16,8 @@
# Due to the kind of processing, processed metadata may be stored in
# a multiprocess cache instead.
+our $VERSION = '0.05';
+
our @ATTR = qw/text_sigle
doc_sigle
corpus_sigle
@@ -66,7 +68,7 @@
if(Log::Log4perl->initialized()) {
state $log = Log::Log4perl->get_logger(__PACKAGE__);
};
- state $log = KorAP::Log->new;
+ state $log = KorAP::XML::Log->new;
return $log;
};
@@ -135,7 +137,7 @@
# Get primary data
my $pd = $rt->{text};
if ($pd) {
- $self->{pd} = KorAP::Document::Primary->new($pd);
+ $self->{pd} = KorAP::XML::Document::Primary->new($pd);
}
else {
croak $unable;
@@ -635,12 +637,12 @@
=head1 NAME
-KorAP::Document
+KorAP::XML::Krill
=head1 SYNOPSIS
- my $doc = KorAP::Document->new(
+ my $doc = KorAP::XML::Krill->new(
path => 'mydoc-1/'
);
@@ -725,7 +727,7 @@
print $doc->primary->data(0,20);
-The L<KorAP::Document::Primary> object containing the primary data.
+The L<KorAP::XML::Document::Primary> object containing the primary data.
=head2 author
diff --git a/lib/KorAP/Log.pm b/lib/KorAP/XML/Log.pm
similarity index 87%
rename from lib/KorAP/Log.pm
rename to lib/KorAP/XML/Log.pm
index bcd8a55..6a06511 100644
--- a/lib/KorAP/Log.pm
+++ b/lib/KorAP/XML/Log.pm
@@ -1,4 +1,4 @@
-package KorAP::Log;
+package KorAP::XML::Log;
use Mojo::Base -base;
use Carp;
diff --git a/lib/KorAP/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
similarity index 91%
rename from lib/KorAP/Tokenizer.pm
rename to lib/KorAP/XML/Tokenizer.pm
index 7e26f32..141cf68 100644
--- a/lib/KorAP/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -1,15 +1,15 @@
-package KorAP::Tokenizer;
+package KorAP::XML::Tokenizer;
use Mojo::Base -base;
use Mojo::ByteStream 'b';
use XML::Fast;
use Try::Tiny;
use Carp qw/croak/;
use Scalar::Util qw/weaken/;
-use KorAP::Tokenizer::Range;
-use KorAP::Tokenizer::Match;
-use KorAP::Tokenizer::Spans;
-use KorAP::Tokenizer::Tokens;
-use KorAP::Field::MultiTermTokenStream;
+use KorAP::XML::Tokenizer::Range;
+use KorAP::XML::Tokenizer::Match;
+use KorAP::XML::Tokenizer::Spans;
+use KorAP::XML::Tokenizer::Tokens;
+use KorAP::XML::Field::MultiTermTokenStream;
use List::MoreUtils 'uniq';
use JSON::XS;
use Log::Log4perl;
@@ -35,7 +35,7 @@
if(Log::Log4perl->initialized()) {
state $log = Log::Log4perl->get_logger(__PACKAGE__);
};
- state $log = KorAP::Log->new;
+ state $log = KorAP::XML::Log->new;
return $log;
};
@@ -44,7 +44,7 @@
my $self = shift;
# Create new token stream
- my $mtts = KorAP::Field::MultiTermTokenStream->new;
+ my $mtts = KorAP::XML::Field::MultiTermTokenStream->new;
my $path = $self->path . lc($self->foundry) . '/' . lc($self->layer) . '.xml';
unless (-e $path) {
@@ -61,8 +61,8 @@
my ($should, $have) = (0, 0);
# Create range and match objects
- my $range = KorAP::Tokenizer::Range->new;
- my $match = KorAP::Tokenizer::Match->new;
+ my $range = KorAP::XML::Tokenizer::Range->new;
+ my $match = KorAP::XML::Tokenizer::Match->new;
my $old = 0;
@@ -253,13 +253,13 @@
# Get span positions through character offsets
sub range {
- return shift->{range} // KorAP::Tokenizer::Range->new;
+ return shift->{range} // KorAP::XML::Tokenizer::Range->new;
};
# Get token positions through character offsets
sub match {
- return shift->{match} // KorAP::Tokenizer::Match->new;
+ return shift->{match} // KorAP::XML::Tokenizer::Match->new;
};
@@ -280,7 +280,7 @@
$param{primary} = $self->doc->primary;
- my $spans = KorAP::Tokenizer::Spans->new(
+ my $spans = KorAP::XML::Tokenizer::Spans->new(
path => $self->path,
range => $self->range,
match => $self->match,
@@ -323,7 +323,7 @@
$param{primary} = $self->doc->primary;
- my $tokens = KorAP::Tokenizer::Tokens->new(
+ my $tokens = KorAP::XML::Tokenizer::Tokens->new(
path => $self->path,
range => $self->range,
match => $self->match,
@@ -366,7 +366,7 @@
return;
};
- my $mod = 'KorAP::Index::' . $foundry . '::' . $layer;
+ my $mod = 'KorAP::XML::Index::' . $foundry . '::' . $layer;
if ($mod->can('new') || eval("require $mod; 1;")) {
if (my $retval = $mod->new($self)->parse(@_)) {
@@ -547,13 +547,13 @@
=head1 NAME
-KorAP::Tokenizer
+KorAP::XML::Tokenizer
=head1 SYNOPSIS
- my $tokens = KorAP::Tokenizer->new(
+ my $tokens = KorAP::XML::Tokenizer->new(
path => '../examples/00003',
- doc => KorAP::Document->new( ... ),
+ doc => KorAP::XML::Krill->new( ... ),
foundry => 'opennlp',
layer => 'tokens'
);
@@ -599,27 +599,27 @@
print $tokens->doc->corpus_id;
-The L<KorAP::Document> object.
+The L<KorAP::XML::Krill> object.
=head2 stream
$tokens->stream->add_meta('adjCount', '<i>45');
-The L<KorAP::Field::MultiTermTokenStream> object
+The L<KorAP::XML::Field::MultiTermTokenStream> object
=head2 range
$tokens->range->lookup(45);
-The L<KorAP::Tokenizer::Range> object for converting span offsets to positions.
+The L<KorAP::XML::Tokenizer::Range> object for converting span offsets to positions.
=head2 match
$tokens->match->lookup(45);
-The L<KorAP::Tokenizer::Match> object for converting token offsets to positions.
+The L<KorAP::XML::Tokenizer::Match> object for converting token offsets to positions.
=head1 METHODS
@@ -683,8 +683,8 @@
Add span information to the parsed token stream.
Expects a C<foundry> name, a C<layer> name and a
callback parameter, that will be called after each parsed
-span. The L<KorAP::Field::MultiTermTokenStream> object will be passed,
-as well as the current L<KorAP::Tokenizer::Span>.
+span. The L<KorAP::XML::Field::MultiTermTokenStream> object will be passed,
+as well as the current L<KorAP::XML::Tokenizer::Span>.
An optional parameter C<encoding> may indicate that the span offsets
are either refering to C<bytes> or C<utf-8> offsets.
@@ -713,8 +713,8 @@
Add token information to the parsed token stream.
Expects a C<foundry> name, a C<layer> name and a
callback parameter, that will be called after each parsed
-token. The L<KorAP::Field::MultiTermTokenStream> object will be passed,
-as well as the current L<KorAP::Tokenizer::Span>.
+token. The L<KorAP::XML::Field::MultiTermTokenStream> object will be passed,
+as well as the current L<KorAP::XML::Tokenizer::Span>.
An optional parameter C<encoding> may indicate that the token offsets
are either refering to C<bytes> or C<utf-8> offsets.
diff --git a/lib/KorAP/Tokenizer/Match.pm b/lib/KorAP/XML/Tokenizer/Match.pm
similarity index 92%
rename from lib/KorAP/Tokenizer/Match.pm
rename to lib/KorAP/XML/Tokenizer/Match.pm
index eba2b09..1a9f1a6 100644
--- a/lib/KorAP/Tokenizer/Match.pm
+++ b/lib/KorAP/XML/Tokenizer/Match.pm
@@ -1,4 +1,4 @@
-package KorAP::Tokenizer::Match;
+package KorAP::XML::Tokenizer::Match;
use strict;
use warnings;
diff --git a/lib/KorAP/Tokenizer/Range.pm b/lib/KorAP/XML/Tokenizer/Range.pm
similarity index 98%
rename from lib/KorAP/Tokenizer/Range.pm
rename to lib/KorAP/XML/Tokenizer/Range.pm
index 762653b..3116e5e 100644
--- a/lib/KorAP/Tokenizer/Range.pm
+++ b/lib/KorAP/XML/Tokenizer/Range.pm
@@ -1,4 +1,4 @@
-package KorAP::Tokenizer::Range;
+package KorAP::XML::Tokenizer::Range;
use strict;
use warnings;
use Array::IntSpan;
diff --git a/lib/KorAP/Tokenizer/Span.pm b/lib/KorAP/XML/Tokenizer/Span.pm
similarity index 96%
rename from lib/KorAP/Tokenizer/Span.pm
rename to lib/KorAP/XML/Tokenizer/Span.pm
index 22715bf..9e34aec 100644
--- a/lib/KorAP/Tokenizer/Span.pm
+++ b/lib/KorAP/XML/Tokenizer/Span.pm
@@ -1,4 +1,4 @@
-package KorAP::Tokenizer::Span;
+package KorAP::XML::Tokenizer::Span;
use strict;
use warnings;
use Mojo::DOM;
diff --git a/lib/KorAP/Tokenizer/Spans.pm b/lib/KorAP/XML/Tokenizer/Spans.pm
similarity index 87%
rename from lib/KorAP/Tokenizer/Spans.pm
rename to lib/KorAP/XML/Tokenizer/Spans.pm
index eaf1549..10ba474 100644
--- a/lib/KorAP/Tokenizer/Spans.pm
+++ b/lib/KorAP/XML/Tokenizer/Spans.pm
@@ -1,10 +1,10 @@
-package KorAP::Tokenizer::Spans;
+package KorAP::XML::Tokenizer::Spans;
use strict;
use warnings;
-use KorAP::Log;
+use KorAP::XML::Log;
use Data::Dumper;
-use Mojo::Base 'KorAP::Tokenizer::Units';
-use KorAP::Tokenizer::Span;
+use Mojo::Base 'KorAP::XML::Tokenizer::Units';
+use KorAP::XML::Tokenizer::Span;
use Mojo::ByteStream 'b';
use XML::Fast;
use Try::Tiny;
@@ -15,7 +15,7 @@
if(Log::Log4perl->initialized()) {
state $log = Log::Log4perl->get_logger(__PACKAGE__);
};
- state $log = KorAP::Log->new;
+ state $log = KorAP::XML::Log->new;
return $log;
};
diff --git a/lib/KorAP/Tokenizer/Token.pm b/lib/KorAP/XML/Tokenizer/Token.pm
similarity index 95%
rename from lib/KorAP/Tokenizer/Token.pm
rename to lib/KorAP/XML/Tokenizer/Token.pm
index a2ac486..36334db 100644
--- a/lib/KorAP/Tokenizer/Token.pm
+++ b/lib/KorAP/XML/Tokenizer/Token.pm
@@ -1,4 +1,4 @@
-package KorAP::Tokenizer::Token;
+package KorAP::XML::Tokenizer::Token;
use strict;
use warnings;
use Mojo::DOM;
diff --git a/lib/KorAP/Tokenizer/Tokens.pm b/lib/KorAP/XML/Tokenizer/Tokens.pm
similarity index 92%
rename from lib/KorAP/Tokenizer/Tokens.pm
rename to lib/KorAP/XML/Tokenizer/Tokens.pm
index b225c08..50d62fa 100644
--- a/lib/KorAP/Tokenizer/Tokens.pm
+++ b/lib/KorAP/XML/Tokenizer/Tokens.pm
@@ -1,7 +1,7 @@
-package KorAP::Tokenizer::Tokens;
-use Mojo::Base 'KorAP::Tokenizer::Units';
+package KorAP::XML::Tokenizer::Tokens;
+use Mojo::Base 'KorAP::XML::Tokenizer::Units';
use Mojo::ByteStream 'b';
-use KorAP::Tokenizer::Token;
+use KorAP::XML::Tokenizer::Token;
use Carp qw/croak carp/;
use XML::Fast;
use Try::Tiny;
diff --git a/lib/KorAP/Tokenizer/Units.pm b/lib/KorAP/XML/Tokenizer/Units.pm
similarity index 90%
rename from lib/KorAP/Tokenizer/Units.pm
rename to lib/KorAP/XML/Tokenizer/Units.pm
index 085a9fa..87f44d5 100644
--- a/lib/KorAP/Tokenizer/Units.pm
+++ b/lib/KorAP/XML/Tokenizer/Units.pm
@@ -1,6 +1,6 @@
-package KorAP::Tokenizer::Units;
-use KorAP::Tokenizer::Span;
-use KorAP::Tokenizer::Token;
+package KorAP::XML::Tokenizer::Units;
+use KorAP::XML::Tokenizer::Span;
+use KorAP::XML::Tokenizer::Token;
use Mojo::Base -base;
has [qw/path foundry layer match range primary/];
@@ -21,7 +21,7 @@
# The span is invalid
return unless $from <= $to;
- my $span = KorAP::Tokenizer::Span->new;
+ my $span = KorAP::XML::Tokenizer::Span->new;
# The span is a milestone
if ($from == $to) {
@@ -79,7 +79,7 @@
return unless defined $pos;
- my $token = KorAP::Tokenizer::Token->new;
+ my $token = KorAP::XML::Tokenizer::Token->new;
$token->id($s->{-id}) if $s && $s->{-id};
$token->pos($pos);
diff --git a/t/index/TestInit.pm b/t/index/TestInit.pm
index f740cad..b03dad2 100644
--- a/t/index/TestInit.pm
+++ b/t/index/TestInit.pm
@@ -3,20 +3,20 @@
use warnings;
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
sub tokens {
my $file = shift;
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', $file);
- my $doc = KorAP::Document->new(
+ my $doc = KorAP::XML::Krill->new(
path => $path . '/'
) or return;
$doc->parse;
- my $tokens = KorAP::Tokenizer->new(
+ my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'OpenNLP',
diff --git a/t/index/connexor_sentences.t b/t/index/connexor_sentences.t
index ab9630f..83a2fb6 100644
--- a/t/index/connexor_sentences.t
+++ b/t/index/connexor_sentences.t
@@ -6,14 +6,14 @@
use Scalar::Util qw/weaken/;
use Data::Dumper;
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Load Korap::Document');
@@ -23,9 +23,9 @@
ok($doc->primary->data, 'Primary data in existence');
is($doc->primary->data_length, 129, 'Data length');
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'OpenNLP',
diff --git a/t/index/meta.t b/t/index/meta.t
index e5a01ba..44ee526 100644
--- a/t/index/meta.t
+++ b/t/index/meta.t
@@ -13,7 +13,7 @@
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
like($doc->path, qr!$path/!, 'Path');
diff --git a/t/index/primary.t b/t/index/primary.t
index 7abf629..e42453c 100644
--- a/t/index/primary.t
+++ b/t/index/primary.t
@@ -6,14 +6,14 @@
use Scalar::Util qw/weaken/;
use Data::Dumper;
use lib 't/index';
-use TestInit;
+use KorAP::XML::Krill;
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::XML::Krill');
ok($doc->parse, 'Parse document');
like($doc->path, qr!$path/!, 'Path');
diff --git a/t/meta.t b/t/meta.t
index a51371a..51eb2be 100644
--- a/t/meta.t
+++ b/t/meta.t
@@ -8,14 +8,14 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
# WPD/00001
my $path = catdir(dirname(__FILE__), 'corpus/WPD/00001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/!, 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
ok($doc->parse, 'Parse document');
@@ -59,7 +59,7 @@
# BRZ13/00001
$path = catdir(dirname(__FILE__), 'corpus/BRZ13/00001');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Sexueller Missbrauch –„Das schreiende Kind steckt noch tief in mir“', 'title');
@@ -88,7 +88,7 @@
# A01/13047
$path = catdir(dirname(__FILE__), 'corpus/A01/13047');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Fischer und Kolp im Sonnenhügel', 'title');
@@ -115,7 +115,7 @@
# ERL/0001
$path = catdir(dirname(__FILE__), 'corpus/ERL/00001');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Amtsblatt des Landesbezirks Baden [diverse Erlasse]', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
@@ -149,7 +149,7 @@
# A01/02035-substring
$path = catdir(dirname(__FILE__), 'corpus/A00/02035-substring');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title'); # A00/JAN.02035
@@ -176,7 +176,7 @@
# A01/02873-meta
$path = catdir(dirname(__FILE__), 'corpus/A00/02873-meta');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Tradition und Moderne', 'title');
@@ -205,7 +205,7 @@
# A01/05663-unbalanced
$path = catdir(dirname(__FILE__), 'corpus/A00/05663-unbalanced');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Mehr Arbeitslose im Dezember', 'title');
@@ -234,7 +234,7 @@
# A01/07452-deep
$path = catdir(dirname(__FILE__), 'corpus/A00/07452-deep');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Wil im Dezember 1999', 'title');
@@ -262,10 +262,10 @@
# ART
$path = catdir(dirname(__FILE__), 'corpus/artificial');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
#is($doc->path, $path . '/', 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
#is($doc->path, $path . '/', 'Path');
ok($doc->parse, 'Parse document');
@@ -299,10 +299,10 @@
# Multipath headers
$path = catdir(dirname(__FILE__), 'corpus/VDI/JAN/00001');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/!, 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
ok($doc->parse, 'Parse document');
@@ -352,7 +352,7 @@
# WDD
$path = catdir(dirname(__FILE__), 'corpus/WDD/G27/38989');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/!, 'Path');
ok($doc->parse, 'Parse document');
diff --git a/t/range.t b/t/range.t
index bd2538f..27c7970 100644
--- a/t/range.t
+++ b/t/range.t
@@ -3,9 +3,9 @@
use warnings;
use Test::More;
-use_ok('KorAP::Tokenizer::Range');
+use_ok('KorAP::XML::Tokenizer::Range');
-my $range = KorAP::Tokenizer::Range->new;
+my $range = KorAP::XML::Tokenizer::Range->new;
# Set a gap from 0 to 2, refering to position 0
$range->gap(0, 2, 0);
diff --git a/t/real/bzk.t b/t/real/bzk.t
index 3b39b25..acaf668 100644
--- a/t/real/bzk.t
+++ b/t/real/bzk.t
@@ -1,6 +1,3 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
use strict;
use warnings;
use Test::More;
@@ -17,11 +14,11 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
my $path = catdir(dirname(__FILE__), '../corpus/BZK/D59/00001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'BZK_D59.00001', 'Correct text sigle');
@@ -69,12 +66,12 @@
ok(!$doc->doc_editor, 'Correct doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/real/bzk_2.t b/t/real/bzk_2.t
index 62a1a34..e4995c9 100644
--- a/t/real/bzk_2.t
+++ b/t/real/bzk_2.t
@@ -14,11 +14,11 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
my $path = catdir(dirname(__FILE__), '../corpus/BZK/D59/00089');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::XML::Krill');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'BZK_D59.00089', 'Correct text sigle');
@@ -67,12 +67,12 @@
ok(!$doc->doc_editor, 'Correct doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/real/goethe.t b/t/real/goethe.t
index ae33e8d..84f239f 100644
--- a/t/real/goethe.t
+++ b/t/real/goethe.t
@@ -14,13 +14,13 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
# GOE/AGA/03828
my $path = catdir(dirname(__FILE__), '../corpus/GOE/AGA/03828');
# my $path = '/home/ndiewald/Repositories/korap/KorAP-sandbox/KorAP-lucene-indexer/t/GOE/AGA/03828';
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'GOE_AGA.03828', 'Correct text sigle');
@@ -63,12 +63,12 @@
ok(!$doc->doc_editor, 'Correct Doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/real/wdd.t b/t/real/wdd.t
index 4ed33d5..30059cd 100644
--- a/t/real/wdd.t
+++ b/t/real/wdd.t
@@ -1,6 +1,3 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
use strict;
use warnings;
use Test::More;
@@ -17,12 +14,12 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
# GOE/AGA/03828
my $path = catdir(dirname(__FILE__), '../corpus/WDD/G27/38989');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'WDD11_G27.38989', 'Correct text sigle');
@@ -61,12 +58,12 @@
ok(!$doc->doc_editor, 'Correct doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/sgbr/sgbr_lemma.t b/t/sgbr/sgbr_lemma.t
index f9d6a0f..27fe073 100644
--- a/t/sgbr/sgbr_lemma.t
+++ b/t/sgbr/sgbr_lemma.t
@@ -4,19 +4,19 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
ok($doc->parse, 'Parse document');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'Sgbr',
diff --git a/t/sgbr/sgbr_meta.t b/t/sgbr/sgbr_meta.t
index 50891f8..53a096f 100644
--- a/t/sgbr/sgbr_meta.t
+++ b/t/sgbr/sgbr_meta.t
@@ -4,13 +4,13 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
diff --git a/t/sgbr/sgbr_pos.t b/t/sgbr/sgbr_pos.t
index ce52bf3..417af0c 100644
--- a/t/sgbr/sgbr_pos.t
+++ b/t/sgbr/sgbr_pos.t
@@ -4,19 +4,19 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
ok($doc->parse, 'Parse document');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'Sgbr',
diff --git a/t/sgbr/sgbr_token.t b/t/sgbr/sgbr_token.t
index aaec3f5..cafa3c4 100644
--- a/t/sgbr/sgbr_token.t
+++ b/t/sgbr/sgbr_token.t
@@ -4,19 +4,19 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
ok($doc->parse, 'Parse document');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'Sgbr',
diff --git a/t/sort_tokens.t b/t/sort_tokens.t
index 011ab6d..1cbfd26 100644
--- a/t/sort_tokens.t
+++ b/t/sort_tokens.t
@@ -6,9 +6,9 @@
use utf8;
use lib 'lib', '../lib';
-use_ok('KorAP::Field::MultiTermTokenStream');
+use_ok('KorAP::XML::Field::MultiTermTokenStream');
-ok(my $mtt = KorAP::Field::MultiTermToken->new, 'New token');
+ok(my $mtt = KorAP::XML::Field::MultiTermToken->new, 'New token');
ok(defined $mtt->o_start(0), 'Set start character offset');
ok($mtt->o_end(5), 'Set end character offset');
ok($mtt->add(term => '@:k=N',
@@ -57,7 +57,7 @@
'c=N$<b>144|'.
'g=N$<b>144]', 'Check string');
-ok($mtt = KorAP::Field::MultiTermToken->new, 'New token');
+ok($mtt = KorAP::XML::Field::MultiTermToken->new, 'New token');
ok(defined $mtt->o_start(0), 'Set start character offset');
ok($mtt->o_end(5), 'Set end character offset');
diff --git a/t/tokens.t b/t/tokens.t
index 87fa584..59b83f3 100644
--- a/t/tokens.t
+++ b/t/tokens.t
@@ -1,6 +1,3 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
use strict;
use warnings;
use utf8;
@@ -8,9 +5,9 @@
use Benchmark ':hireswallclock';
use lib 'lib', '../lib';
-use_ok('KorAP::Field::MultiTerm');
+use_ok('KorAP::XML::Field::MultiTerm');
-ok(my $term = KorAP::Field::MultiTerm->new(
+ok(my $term = KorAP::XML::Field::MultiTerm->new(
term => 'Baum',
p_start => 0,
p_end => 56,
@@ -27,7 +24,7 @@
is($term->payload, '<i>56');
is($term->to_string, 'Baum$<i>34<i>120<i>56<i>56');
-ok($term = KorAP::Field::MultiTerm->new(
+ok($term = KorAP::XML::Field::MultiTerm->new(
term => 'Baum'
), 'Create new object');
@@ -39,7 +36,7 @@
is($term->payload, undef);
is($term->to_string, 'Baum');
-ok($term = KorAP::Field::MultiTerm->new(
+ok($term = KorAP::XML::Field::MultiTerm->new(
term => 'Ba#um'
), 'Create new object');
@@ -51,7 +48,7 @@
is($term->payload, undef);
is($term->to_string, 'Ba\#um');
-ok($term = KorAP::Field::MultiTerm->new(
+ok($term = KorAP::XML::Field::MultiTerm->new(
term => 'Ba#u$m',
payload => '<i>45'
), 'Create new object');
diff --git a/t/transform.t b/t/transform.t
index a0e7f74..33706a4 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -12,7 +12,7 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
sub _t2h {
my $string = shift;
@@ -59,10 +59,10 @@
my $path = catdir(dirname(__FILE__), 'corpus/WPD/00001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
ok($doc->parse, 'Parse document');
@@ -83,9 +83,9 @@
is($doc->author, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
# Get tokens
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
# Get tokenization
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'OpenNLP',