New structure is KorAP::XML::Krill
Change-Id: I42297512b99acca4ab011306d11c095641397af5
diff --git a/t/index/TestInit.pm b/t/index/TestInit.pm
index f740cad..b03dad2 100644
--- a/t/index/TestInit.pm
+++ b/t/index/TestInit.pm
@@ -3,20 +3,20 @@
use warnings;
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
sub tokens {
my $file = shift;
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', $file);
- my $doc = KorAP::Document->new(
+ my $doc = KorAP::XML::Krill->new(
path => $path . '/'
) or return;
$doc->parse;
- my $tokens = KorAP::Tokenizer->new(
+ my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'OpenNLP',
diff --git a/t/index/connexor_sentences.t b/t/index/connexor_sentences.t
index ab9630f..83a2fb6 100644
--- a/t/index/connexor_sentences.t
+++ b/t/index/connexor_sentences.t
@@ -6,14 +6,14 @@
use Scalar::Util qw/weaken/;
use Data::Dumper;
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Load Korap::Document');
@@ -23,9 +23,9 @@
ok($doc->primary->data, 'Primary data in existence');
is($doc->primary->data_length, 129, 'Data length');
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'OpenNLP',
diff --git a/t/index/meta.t b/t/index/meta.t
index e5a01ba..44ee526 100644
--- a/t/index/meta.t
+++ b/t/index/meta.t
@@ -13,7 +13,7 @@
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
like($doc->path, qr!$path/!, 'Path');
diff --git a/t/index/primary.t b/t/index/primary.t
index 7abf629..e42453c 100644
--- a/t/index/primary.t
+++ b/t/index/primary.t
@@ -6,14 +6,14 @@
use Scalar::Util qw/weaken/;
use Data::Dumper;
use lib 't/index';
-use TestInit;
+use KorAP::XML::Krill;
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::XML::Krill');
ok($doc->parse, 'Parse document');
like($doc->path, qr!$path/!, 'Path');
diff --git a/t/meta.t b/t/meta.t
index a51371a..51eb2be 100644
--- a/t/meta.t
+++ b/t/meta.t
@@ -8,14 +8,14 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
# WPD/00001
my $path = catdir(dirname(__FILE__), 'corpus/WPD/00001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/!, 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
ok($doc->parse, 'Parse document');
@@ -59,7 +59,7 @@
# BRZ13/00001
$path = catdir(dirname(__FILE__), 'corpus/BRZ13/00001');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Sexueller Missbrauch –„Das schreiende Kind steckt noch tief in mir“', 'title');
@@ -88,7 +88,7 @@
# A01/13047
$path = catdir(dirname(__FILE__), 'corpus/A01/13047');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Fischer und Kolp im Sonnenhügel', 'title');
@@ -115,7 +115,7 @@
# ERL/0001
$path = catdir(dirname(__FILE__), 'corpus/ERL/00001');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Amtsblatt des Landesbezirks Baden [diverse Erlasse]', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
@@ -149,7 +149,7 @@
# A01/02035-substring
$path = catdir(dirname(__FILE__), 'corpus/A00/02035-substring');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title'); # A00/JAN.02035
@@ -176,7 +176,7 @@
# A01/02873-meta
$path = catdir(dirname(__FILE__), 'corpus/A00/02873-meta');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Tradition und Moderne', 'title');
@@ -205,7 +205,7 @@
# A01/05663-unbalanced
$path = catdir(dirname(__FILE__), 'corpus/A00/05663-unbalanced');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Mehr Arbeitslose im Dezember', 'title');
@@ -234,7 +234,7 @@
# A01/07452-deep
$path = catdir(dirname(__FILE__), 'corpus/A00/07452-deep');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->title, 'Wil im Dezember 1999', 'title');
@@ -262,10 +262,10 @@
# ART
$path = catdir(dirname(__FILE__), 'corpus/artificial');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
#is($doc->path, $path . '/', 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
#is($doc->path, $path . '/', 'Path');
ok($doc->parse, 'Parse document');
@@ -299,10 +299,10 @@
# Multipath headers
$path = catdir(dirname(__FILE__), 'corpus/VDI/JAN/00001');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/!, 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
ok($doc->parse, 'Parse document');
@@ -352,7 +352,7 @@
# WDD
$path = catdir(dirname(__FILE__), 'corpus/WDD/G27/38989');
-ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/!, 'Path');
ok($doc->parse, 'Parse document');
diff --git a/t/range.t b/t/range.t
index bd2538f..27c7970 100644
--- a/t/range.t
+++ b/t/range.t
@@ -3,9 +3,9 @@
use warnings;
use Test::More;
-use_ok('KorAP::Tokenizer::Range');
+use_ok('KorAP::XML::Tokenizer::Range');
-my $range = KorAP::Tokenizer::Range->new;
+my $range = KorAP::XML::Tokenizer::Range->new;
# Set a gap from 0 to 2, refering to position 0
$range->gap(0, 2, 0);
diff --git a/t/real/bzk.t b/t/real/bzk.t
index 3b39b25..acaf668 100644
--- a/t/real/bzk.t
+++ b/t/real/bzk.t
@@ -1,6 +1,3 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
use strict;
use warnings;
use Test::More;
@@ -17,11 +14,11 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
my $path = catdir(dirname(__FILE__), '../corpus/BZK/D59/00001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'BZK_D59.00001', 'Correct text sigle');
@@ -69,12 +66,12 @@
ok(!$doc->doc_editor, 'Correct doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/real/bzk_2.t b/t/real/bzk_2.t
index 62a1a34..e4995c9 100644
--- a/t/real/bzk_2.t
+++ b/t/real/bzk_2.t
@@ -14,11 +14,11 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
my $path = catdir(dirname(__FILE__), '../corpus/BZK/D59/00089');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::XML::Krill');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'BZK_D59.00089', 'Correct text sigle');
@@ -67,12 +67,12 @@
ok(!$doc->doc_editor, 'Correct doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/real/goethe.t b/t/real/goethe.t
index ae33e8d..84f239f 100644
--- a/t/real/goethe.t
+++ b/t/real/goethe.t
@@ -14,13 +14,13 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
# GOE/AGA/03828
my $path = catdir(dirname(__FILE__), '../corpus/GOE/AGA/03828');
# my $path = '/home/ndiewald/Repositories/korap/KorAP-sandbox/KorAP-lucene-indexer/t/GOE/AGA/03828';
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'GOE_AGA.03828', 'Correct text sigle');
@@ -63,12 +63,12 @@
ok(!$doc->doc_editor, 'Correct Doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/real/wdd.t b/t/real/wdd.t
index 4ed33d5..30059cd 100644
--- a/t/real/wdd.t
+++ b/t/real/wdd.t
@@ -1,6 +1,3 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
use strict;
use warnings;
use Test::More;
@@ -17,12 +14,12 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
# GOE/AGA/03828
my $path = catdir(dirname(__FILE__), '../corpus/WDD/G27/38989');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
ok($doc->parse, 'Parse document');
is($doc->text_sigle, 'WDD11_G27.38989', 'Correct text sigle');
@@ -61,12 +58,12 @@
ok(!$doc->doc_editor, 'Correct doc editor');
# Tokenization
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
# Get tokenization
-my $tokens = KorAP::Tokenizer->new(
+my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => $token_base_foundry,
diff --git a/t/sgbr/sgbr_lemma.t b/t/sgbr/sgbr_lemma.t
index f9d6a0f..27fe073 100644
--- a/t/sgbr/sgbr_lemma.t
+++ b/t/sgbr/sgbr_lemma.t
@@ -4,19 +4,19 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
ok($doc->parse, 'Parse document');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'Sgbr',
diff --git a/t/sgbr/sgbr_meta.t b/t/sgbr/sgbr_meta.t
index 50891f8..53a096f 100644
--- a/t/sgbr/sgbr_meta.t
+++ b/t/sgbr/sgbr_meta.t
@@ -4,13 +4,13 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
diff --git a/t/sgbr/sgbr_pos.t b/t/sgbr/sgbr_pos.t
index ce52bf3..417af0c 100644
--- a/t/sgbr/sgbr_pos.t
+++ b/t/sgbr/sgbr_pos.t
@@ -4,19 +4,19 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
ok($doc->parse, 'Parse document');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'Sgbr',
diff --git a/t/sgbr/sgbr_token.t b/t/sgbr/sgbr_token.t
index aaec3f5..cafa3c4 100644
--- a/t/sgbr/sgbr_token.t
+++ b/t/sgbr/sgbr_token.t
@@ -4,19 +4,19 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
use Data::Dumper;
-use KorAP::Tokenizer;
-use KorAP::Document;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
use utf8;
my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
-ok(my $doc = KorAP::Document->new(
+ok(my $doc = KorAP::XML::Krill->new(
path => $path . '/'
), 'Create Document');
ok($doc->parse, 'Parse document');
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'Sgbr',
diff --git a/t/sort_tokens.t b/t/sort_tokens.t
index 011ab6d..1cbfd26 100644
--- a/t/sort_tokens.t
+++ b/t/sort_tokens.t
@@ -6,9 +6,9 @@
use utf8;
use lib 'lib', '../lib';
-use_ok('KorAP::Field::MultiTermTokenStream');
+use_ok('KorAP::XML::Field::MultiTermTokenStream');
-ok(my $mtt = KorAP::Field::MultiTermToken->new, 'New token');
+ok(my $mtt = KorAP::XML::Field::MultiTermToken->new, 'New token');
ok(defined $mtt->o_start(0), 'Set start character offset');
ok($mtt->o_end(5), 'Set end character offset');
ok($mtt->add(term => '@:k=N',
@@ -57,7 +57,7 @@
'c=N$<b>144|'.
'g=N$<b>144]', 'Check string');
-ok($mtt = KorAP::Field::MultiTermToken->new, 'New token');
+ok($mtt = KorAP::XML::Field::MultiTermToken->new, 'New token');
ok(defined $mtt->o_start(0), 'Set start character offset');
ok($mtt->o_end(5), 'Set end character offset');
diff --git a/t/tokens.t b/t/tokens.t
index 87fa584..59b83f3 100644
--- a/t/tokens.t
+++ b/t/tokens.t
@@ -1,6 +1,3 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
use strict;
use warnings;
use utf8;
@@ -8,9 +5,9 @@
use Benchmark ':hireswallclock';
use lib 'lib', '../lib';
-use_ok('KorAP::Field::MultiTerm');
+use_ok('KorAP::XML::Field::MultiTerm');
-ok(my $term = KorAP::Field::MultiTerm->new(
+ok(my $term = KorAP::XML::Field::MultiTerm->new(
term => 'Baum',
p_start => 0,
p_end => 56,
@@ -27,7 +24,7 @@
is($term->payload, '<i>56');
is($term->to_string, 'Baum$<i>34<i>120<i>56<i>56');
-ok($term = KorAP::Field::MultiTerm->new(
+ok($term = KorAP::XML::Field::MultiTerm->new(
term => 'Baum'
), 'Create new object');
@@ -39,7 +36,7 @@
is($term->payload, undef);
is($term->to_string, 'Baum');
-ok($term = KorAP::Field::MultiTerm->new(
+ok($term = KorAP::XML::Field::MultiTerm->new(
term => 'Ba#um'
), 'Create new object');
@@ -51,7 +48,7 @@
is($term->payload, undef);
is($term->to_string, 'Ba\#um');
-ok($term = KorAP::Field::MultiTerm->new(
+ok($term = KorAP::XML::Field::MultiTerm->new(
term => 'Ba#u$m',
payload => '<i>45'
), 'Create new object');
diff --git a/t/transform.t b/t/transform.t
index a0e7f74..33706a4 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -12,7 +12,7 @@
use File::Basename 'dirname';
use File::Spec::Functions 'catdir';
-use_ok('KorAP::Document');
+use_ok('KorAP::XML::Krill');
sub _t2h {
my $string = shift;
@@ -59,10 +59,10 @@
my $path = catdir(dirname(__FILE__), 'corpus/WPD/00001');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
-ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
like($doc->path, qr!$path/$!, 'Path');
ok($doc->parse, 'Parse document');
@@ -83,9 +83,9 @@
is($doc->author, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
# Get tokens
-use_ok('KorAP::Tokenizer');
+use_ok('KorAP::XML::Tokenizer');
# Get tokenization
-ok(my $tokens = KorAP::Tokenizer->new(
+ok(my $tokens = KorAP::XML::Tokenizer->new(
path => $doc->path,
doc => $doc,
foundry => 'OpenNLP',