Minor improvements by introducing getters and setters instead of combinators in tokenizer
Change-Id: I31425a3ac991efb131d33a18b6ff012098ddeac8
diff --git a/lib/KorAP/XML/Annotation/Base/Paragraphs.pm b/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
index 235e2ff..694dfc8 100644
--- a/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
+++ b/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
@@ -10,13 +10,13 @@
layer => 'paragraph',
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
$mtt->add(
term => '<>:base/s:p',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
payload => '<b>1',
pti => 64
);
diff --git a/lib/KorAP/XML/Annotation/Base/Sentences.pm b/lib/KorAP/XML/Annotation/Base/Sentences.pm
index 1d66c8a..852146f 100644
--- a/lib/KorAP/XML/Annotation/Base/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/Base/Sentences.pm
@@ -12,33 +12,23 @@
layer => 'sentences',
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
- $first = [$span->p_start, $span->o_start] unless defined $first;
+ $first = [$span->get_p_start, $span->get_o_start] unless defined $first;
$mtt->add(
term => '<>:base/s:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
payload => '<b>2',
pti => 64
);
- $last_p = $span->p_end;
- $last_o = $span->o_end;
+ $last_p = $span->get_p_end;
+ $last_o = $span->get_o_end;
$i++;
}
) or return;
- # my $mt = $$self->stream->pos($first->[0]);
- # $mt->add(
- # term => '<>:base/s:t',
- # o_start => $first->[1],
- # p_end => $last_p,
- # o_end => $last_o,
- # payload => '<b>0',
- # pti => 64
- # );
-
$$self->stream->add_meta('base/sentences', '<i>' . $i);
return 1;
diff --git a/lib/KorAP/XML/Annotation/CMC/Morpho.pm b/lib/KorAP/XML/Annotation/CMC/Morpho.pm
index 731950a..29c563a 100644
--- a/lib/KorAP/XML/Annotation/CMC/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/CMC/Morpho.pm
@@ -9,9 +9,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $array = $content->{fs}->{f} or return;
@@ -42,6 +42,6 @@
sub layer_info {
['cmc/l=tokens', 'cmc/p=tokens']
-}
+};
1;
diff --git a/lib/KorAP/XML/Annotation/Connexor/Morpho.pm b/lib/KorAP/XML/Annotation/Connexor/Morpho.pm
index 9f79305..6500ab6 100644
--- a/lib/KorAP/XML/Annotation/Connexor/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/Connexor/Morpho.pm
@@ -27,9 +27,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
@@ -37,39 +37,38 @@
for my $f (@$features) {
- # Lemma
- if (($f->{-name} eq 'lemma') && ($found = $f->{'#text'})) {
- if (index($found, "\N{U+00a0}") >= 0) {
- foreach (split(/\x{00A0}/, $found)) {
- $mtt->add(
- term => 'cnx/l:' . $_
- );
- }
- }
- else {
- $mtt->add(
- term => 'cnx/l:' . $found
- );
- };
- }
+ # Lemma
+ if (($f->{-name} eq 'lemma') && ($found = $f->{'#text'})) {
+ if (index($found, "\N{U+00a0}") >= 0) {
+ foreach (split(/\x{00A0}/, $found)) {
+ $mtt->add(
+ term => 'cnx/l:' . $_
+ );
+ }
+ }
+ else {
+ $mtt->add(
+ term => 'cnx/l:' . $found
+ );
+ };
+ }
- # POS
- elsif (($f->{-name} eq 'pos') && ($found = $f->{'#text'})) {
- $mtt->add(
- term => 'cnx/p:' . $found
- );
-
- }
- # MSD
- # This could follow
- # http://www.ids-mannheim.de/cosmas2/projekt/referenz/connexor/morph.html
- elsif (($f->{-name} eq 'msd') && ($found = $f->{'#text'})) {
- foreach (split(':', $found)) {
- $mtt->add(
- term => 'cnx/m:' . $_
- );
- };
- };
+ # POS
+ elsif (($f->{-name} eq 'pos') && ($found = $f->{'#text'})) {
+ $mtt->add(
+ term => 'cnx/p:' . $found
+ );
+ }
+ # MSD
+ # This could follow
+ # http://www.ids-mannheim.de/cosmas2/projekt/referenz/connexor/morph.html
+ elsif (($f->{-name} eq 'msd') && ($found = $f->{'#text'})) {
+ foreach (split(':', $found)) {
+ $mtt->add(
+ term => 'cnx/m:' . $_
+ );
+ };
+ };
};
}
) or return;
@@ -78,7 +77,7 @@
};
sub layer_info {
- ['cnx/l=tokens', 'cnx/p=tokens', 'cnx/m=tokens'];
+ ['cnx/l=tokens', 'cnx/p=tokens', 'cnx/m=tokens'];
};
diff --git a/lib/KorAP/XML/Annotation/Connexor/Phrase.pm b/lib/KorAP/XML/Annotation/Connexor/Phrase.pm
index 1fb5901..754cf4f 100644
--- a/lib/KorAP/XML/Annotation/Connexor/Phrase.pm
+++ b/lib/KorAP/XML/Annotation/Connexor/Phrase.pm
@@ -10,22 +10,22 @@
cb => sub {
my ($stream, $span) = @_;
- my $content = $span->hash->{fs}->{f};
+ my $content = $span->get_hash->{fs}->{f};
return if $content->{-name} ne 'pos';
my $type = $content->{'#text'};
if ($type) {
- my $mtt = $stream->pos($span->p_start);
- $mtt->add(
- term => '<>:cnx/c:' . $type,
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- pti => 64,
- payload => '<b>0' # Pseudo-depth
- );
+ my $mtt = $stream->pos($span->get_p_start);
+ $mtt->add(
+ term => '<>:cnx/c:' . $type,
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
+ pti => 64,
+ payload => '<b>0' # Pseudo-depth
+ );
};
}
) or return;
diff --git a/lib/KorAP/XML/Annotation/Connexor/Sentences.pm b/lib/KorAP/XML/Annotation/Connexor/Sentences.pm
index 52798b4..b36956f 100644
--- a/lib/KorAP/XML/Annotation/Connexor/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/Connexor/Sentences.pm
@@ -10,14 +10,14 @@
layer => 'sentences',
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
$mtt->add(
- term => '<>:cnx/s:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- pti => 64,
- payload => '<b>0'
+ term => '<>:cnx/s:s',
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
+ pti => 64,
+ payload => '<b>0'
);
$i++;
}
diff --git a/lib/KorAP/XML/Annotation/Connexor/Syntax.pm b/lib/KorAP/XML/Annotation/Connexor/Syntax.pm
index a75e4cd..171e0ec 100644
--- a/lib/KorAP/XML/Annotation/Connexor/Syntax.pm
+++ b/lib/KorAP/XML/Annotation/Connexor/Syntax.pm
@@ -9,17 +9,17 @@
layer => 'syntax',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
my $found;
- my $spans = $token->hash->{fs}->{f}->{fs}->{f};
+ my $spans = $token->get_hash->{fs}->{f}->{fs}->{f};
# syntax
foreach (@$spans) {
- if (($_->{-name} eq 'pos') && ($found = $_->{'#text'})) {
- $mtt->add(
- term => 'cnx/syn:' . $found
- );
- };
+ if (($_->{-name} eq 'pos') && ($found = $_->{'#text'})) {
+ $mtt->add(
+ term => 'cnx/syn:' . $found
+ );
+ };
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm b/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm
index 8b712e5..849a3c7 100644
--- a/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm
+++ b/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm
@@ -17,26 +17,26 @@
cb => sub {
my ($stream, $span) = @_;
- $corenlp_const{$span->id} = $span;
+ $corenlp_const{$span->get_id} = $span;
# Maybe root
- $corenlp_const_root->insert($span->id);
+ $corenlp_const_root->insert($span->get_id);
- my $rel = $span->hash->{rel} or return;
+ my $rel = $span->get_hash->{rel} or return;
# Make rel an array in case it's not
$rel = [$rel] unless ref $rel eq 'ARRAY';
foreach (@$rel) {
- if ($_->{-label} eq 'dominates') {
- if ($_->{-target}) {
- $corenlp_const_noroot->insert($_->{-target});
- }
- elsif (my $uri = $_->{-uri}) {
- $uri =~ s/^morpho\.xml#//;
- $corenlp_const_noroot->insert($uri);
- };
- };
+ if ($_->{-label} eq 'dominates') {
+ if ($_->{-target}) {
+ $corenlp_const_noroot->insert($_->{-target});
+ }
+ elsif (my $uri = $_->{-uri}) {
+ $uri =~ s/^morpho\.xml#//;
+ $corenlp_const_noroot->insert($uri);
+ };
+ };
};
}
) or return;
@@ -50,9 +50,9 @@
$add_const = sub {
my $span = shift;
my $level = shift;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
- my $content = $span->hash;
+ my $content = $span->get_hash;
my $f = $content->{fs}->{f};
return unless $f->{-name} eq 'const';
@@ -61,9 +61,9 @@
# $type is now NPA, NP, NUM ...
my %term = (
term => '<>:corenlp/c:' . $type,
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
pti => 64
);
diff --git a/lib/KorAP/XML/Annotation/CoreNLP/Morpho.pm b/lib/KorAP/XML/Annotation/CoreNLP/Morpho.pm
index 6711fe0..97a139f 100644
--- a/lib/KorAP/XML/Annotation/CoreNLP/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/CoreNLP/Morpho.pm
@@ -9,16 +9,16 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f} or return;
+ my $content = $token->get_hash->{fs}->{f} or return;
$content = $content->{fs}->{f};
# syntax
if (($content->{-name} eq 'pos') && ($content->{'#text'})) {
- $mtt->add(
- term => 'corenlp/p:' . $content->{'#text'}
- );
+ $mtt->add(
+ term => 'corenlp/p:' . $content->{'#text'}
+ );
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/CoreNLP/NamedEntities.pm b/lib/KorAP/XML/Annotation/CoreNLP/NamedEntities.pm
index 317b9e9..0a9862e 100644
--- a/lib/KorAP/XML/Annotation/CoreNLP/NamedEntities.pm
+++ b/lib/KorAP/XML/Annotation/CoreNLP/NamedEntities.pm
@@ -14,19 +14,19 @@
layer => $model // lc('NamedEntities'),
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f} or return;
+ my $content = $token->get_hash->{fs}->{f} or return;
my $found;
if (($content->{-name} eq 'ne') &&
- ($found = $content->{fs}) &&
- ($found = $found->{f}) &&
- ($found->{-name} eq 'ent') &&
- ($found = $found->{'#text'})) {
- $mtt->add(
- term => 'corenlp/ne:' . $found
- );
+ ($found = $content->{fs}) &&
+ ($found = $found->{f}) &&
+ ($found->{-name} eq 'ent') &&
+ ($found = $found->{'#text'})) {
+ $mtt->add(
+ term => 'corenlp/ne:' . $found
+ );
};
}) or return;
@@ -34,7 +34,7 @@
};
sub layer_info {
- ['corenlp/ne=tokens'];
+ ['corenlp/ne=tokens'];
};
1;
diff --git a/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm b/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm
index f643526..e6b4cc0 100644
--- a/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm
@@ -10,14 +10,14 @@
layer => 'sentences',
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
$mtt->add(
- term => '<>:corenlp/s:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- pti => 64,
- payload => '<b>0' # Could also be 2 for t/p/s
+ term => '<>:corenlp/s:s',
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
+ pti => 64,
+ payload => '<b>0' # Could also be 2 for t/p/s
);
$i++;
}
diff --git a/lib/KorAP/XML/Annotation/DGD/Morpho.pm b/lib/KorAP/XML/Annotation/DGD/Morpho.pm
index 49ccba8..0aed712 100644
--- a/lib/KorAP/XML/Annotation/DGD/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/DGD/Morpho.pm
@@ -10,10 +10,10 @@
layer => 'annot',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
- my $tui = $stream->tui($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
+ my $tui = $stream->tui($token->get_pos);
- my $content = $token->hash->{fs}->{f} or return;
+ my $content = $token->get_hash->{fs}->{f} or return;
$content = $content->{fs}->{f};
$content = [$content] unless ref $content eq 'ARRAY';
diff --git a/lib/KorAP/XML/Annotation/DGD/Structure.pm b/lib/KorAP/XML/Annotation/DGD/Structure.pm
index 4b49349..0643caf 100644
--- a/lib/KorAP/XML/Annotation/DGD/Structure.pm
+++ b/lib/KorAP/XML/Annotation/DGD/Structure.pm
@@ -23,7 +23,7 @@
my ($stream, $span) = @_;
# Read feature
- my $feature = $span->hash->{fs}->{f};
+ my $feature = $span->get_hash->{fs}->{f};
my $attrs;
# Get attributes
@@ -38,10 +38,10 @@
# Check only for anchors
if ($name eq 'anchor') {
- push @milestones, [ $span->p_start, $span->o_start ];
+ push @milestones, [ $span->get_p_start, $span->get_o_start ];
} else {
- $last_p = $span->p_start;
- $last_o = $span->o_end;
+ $last_p = $span->get_p_start;
+ $last_o = $span->get_o_end;
}
}
) or return;
diff --git a/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm b/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm
index 4cf6d50..efdf5ed 100644
--- a/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm
@@ -9,9 +9,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
diff --git a/lib/KorAP/XML/Annotation/DeReKo/Structure.pm b/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
index d56173a..c60a3b4 100644
--- a/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
+++ b/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
@@ -16,10 +16,10 @@
my $tui = 0;
# Get starting position
- my $p_start = $span->p_start;
+ my $p_start = $span->get_p_start;
# Read feature
- my $feature = $span->hash->{fs}->{f};
+ my $feature = $span->get_hash->{fs}->{f};
my $attrs;
# Get attributes
@@ -46,19 +46,19 @@
return;
};
- my $p_end = $span->p_end;
+ my $p_end = $span->get_p_end;
# Add structure
my $mt = $mtt->add(
term => '<>:dereko/s:' . $name,
- o_start => $span->o_start,
- o_end => $span->o_end,
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
p_start => $p_start,
p_end => $p_end,
- pti => $span->milestone ? 65 : 64,
+ pti => $span->get_milestone ? 65 : 64,
);
- my $level = $span->hash->{'-l'};
+ my $level = $span->get_hash->{'-l'};
if ($level || $tui) {
my $pl;
$pl .= '<b>' . ($level ? $level - 1 : 0);
@@ -95,7 +95,7 @@
if (my $nr = first { $_->{-name} eq 'n' } @$attrs) {
if (($nr = $nr->{'#text'}) && looks_like_number($nr)) {
my $mt2 = $mtt->add('~:base/s:pb');
- $mt2->set_payload('<i>' . $nr . '<i>' . $span->o_start);
+ $mt2->set_payload('<i>' . $nr . '<i>' . $span->get_o_start);
$mt2->set_stored_offsets(0);
};
};
@@ -115,7 +115,7 @@
p_start => $p_start,
pti => 17,
payload => '<s>' . $tui .
- ($span->milestone ? '' : '<i>' . $p_end)
+ ($span->get_milestone ? '' : '<i>' . $p_end)
);
};
};
diff --git a/lib/KorAP/XML/Annotation/Glemm/Morpho.pm b/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
index c4ccea5..61f4d9b 100644
--- a/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
@@ -12,40 +12,40 @@
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{'fs'}->{'f'} or return;
+ my $content = $token->get_hash->{'fs'}->{'f'} or return;
# All interpretations
foreach (ref $content eq 'ARRAY' ? @$content : $content) {
- # All features
- $content = $_->{'fs'}->{'f'};
+ # All features
+ $content = $_->{'fs'}->{'f'};
- my $lemma;
- my ($composition, $derivation) = (0,0);
+ my $lemma;
+ my ($composition, $derivation) = (0,0);
- # Iterate over
- foreach (ref $content eq 'ARRAY' ? @$content : $content) {
+ # Iterate over
+ foreach (ref $content eq 'ARRAY' ? @$content : $content) {
- # syntax
- if (($_->{-name} eq 'lemma') && $_->{'#text'}) {
- $lemma = $_->{'#text'};
- }
- elsif ($_->{-name} eq 'composition' && $_->{'#text'} eq 'true') {
- $composition = 1;
- }
- elsif ($_->{-name} eq 'derivation' && $_->{'#text'} eq 'true') {
- $derivation = 1;
- };
- };
+ # syntax
+ if (($_->{-name} eq 'lemma') && $_->{'#text'}) {
+ $lemma = $_->{'#text'};
+ }
+ elsif ($_->{-name} eq 'composition' && $_->{'#text'} eq 'true') {
+ $composition = 1;
+ }
+ elsif ($_->{-name} eq 'derivation' && $_->{'#text'} eq 'true') {
+ $derivation = 1;
+ };
+ };
- $mtt->add(
- term => 'glemm/l:' .
- ($composition ? '+' : '_') .
- ($derivation ? '+' : '_') .
- $lemma
- ) if $lemma;
+ $mtt->add(
+ term => 'glemm/l:' .
+ ($composition ? '+' : '_') .
+ ($derivation ? '+' : '_') .
+ $lemma
+ ) if $lemma;
};
}) or return;
@@ -53,7 +53,7 @@
};
sub layer_info {
- ['glemm/l=tokens'];
+ ['glemm/l=tokens'];
};
1;
diff --git a/lib/KorAP/XML/Annotation/HNC/Morpho.pm b/lib/KorAP/XML/Annotation/HNC/Morpho.pm
index 8128015..4b2e5f4 100644
--- a/lib/KorAP/XML/Annotation/HNC/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/HNC/Morpho.pm
@@ -10,9 +10,9 @@
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
diff --git a/lib/KorAP/XML/Annotation/LWC/Dependency.pm b/lib/KorAP/XML/Annotation/LWC/Dependency.pm
index ffc3875..b7532f4 100644
--- a/lib/KorAP/XML/Annotation/LWC/Dependency.pm
+++ b/lib/KorAP/XML/Annotation/LWC/Dependency.pm
@@ -14,10 +14,10 @@
my ($stream, $source, $tokens) = @_;
# Get MultiTermToken from stream for source
- my $mtt = $stream->pos($source->pos);
+ my $mtt = $stream->pos($source->get_pos);
# Serialized information from token
- my $content = $source->hash;
+ my $content = $source->get_hash;
# Get relation information
my $rel = $content->{rel};
@@ -42,18 +42,18 @@
term => '>:lwc/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $target->pos # . # right part token position
+ '<i>' . $target->get_pos # . # right part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_term->tui # right part tui
);
- my $target_mtt = $stream->pos($target->pos);
+ my $target_mtt = $stream->pos($target->get_pos);
$target_mtt->add(
term => '<:lwc/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $source->pos # . # left part token position
+ '<i>' . $source->get_pos # . # left part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_term->tui # right part tui
);
@@ -66,23 +66,23 @@
term => '>:lwc/d:' . $label,
pti => 33, # term-to-element relation
payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_start . # right part start position
- '<i>' . $target->p_end # . # right part end position
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_start . # right part start position
+ '<i>' . $target->get_p_end # . # right part end position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_span->tui # right part tui
);
- my $target_mtt = $stream->pos($target->p_start);
+ my $target_mtt = $stream->pos($target->get_p_start);
$target_mtt->add(
term => '<:lwc/d:' . $label,
pti => 34, # element-to-term relation
payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_end . # right part end position
- '<i>' . $source->pos # . # left part token position
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_end . # right part end position
+ '<i>' . $source->get_pos # . # left part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_span->tui # right part tui
);
diff --git a/lib/KorAP/XML/Annotation/MDParser/Dependency.pm b/lib/KorAP/XML/Annotation/MDParser/Dependency.pm
index f8a14b6..8816f1f 100644
--- a/lib/KorAP/XML/Annotation/MDParser/Dependency.pm
+++ b/lib/KorAP/XML/Annotation/MDParser/Dependency.pm
@@ -17,10 +17,10 @@
my ($stream, $source, $tokens) = @_;
# Get MultiTermToken from stream for source
- my $mtt = $stream->pos($source->pos);
+ my $mtt = $stream->pos($source->get_pos);
# Serialized information from token
- my $content = $source->hash;
+ my $content = $source->get_hash;
# Get relation information
my $rel = $content->{rel};
@@ -28,72 +28,71 @@
# Iterate over relations
foreach (@$rel) {
- my $label = $_->{-label};
+ my $label = $_->{-label};
- #my $target = $stream->tui($source->pos);
- my $from = $_->{span}->{-from};
- my $to = $_->{span}->{-to};
+ #my $target = $stream->tui($source->pos);
+ my $from = $_->{span}->{-from};
+ my $to = $_->{span}->{-to};
- # Target
- my $target = $tokens->token($from, $to);
+ # Target
+ my $target = $tokens->token($from, $to);
- # Relation is term-to-term with a found target!
- if ($target) {
+ # Relation is term-to-term with a found target!
+ if ($target) {
- # Unary means, it refers to itself!
- $mtt->add(
- term => '>:mdp/d:' . $label,
- pti => 32, # term-to-term relation
- payload =>
- '<i>' . $target->pos # . # right part token position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_term->tui # right part tui
- );
+ # Unary means, it refers to itself!
+ $mtt->add(
+ term => '>:mdp/d:' . $label,
+ pti => 32, # term-to-term relation
+ payload =>
+ '<i>' . $target->get_pos # . # right part token position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_term->tui # right part tui
+ );
- my $target_mtt = $stream->pos($target->pos);
+ my $target_mtt = $stream->pos($target->get_pos);
- $target_mtt->add(
- term => '<:mdp/d:' . $label,
- pti => 32, # term-to-term relation
- payload =>
- '<i>' . $source->pos # . # left part token position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_term->tui # right part tui
- );
- }
+ $target_mtt->add(
+ term => '<:mdp/d:' . $label,
+ pti => 32, # term-to-term relation
+ payload =>
+ '<i>' . $source->get_pos # . # left part token position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_term->tui # right part tui
+ );
+ }
- # Relation is possibly term-to-element with a found target!
- elsif ($target = $tokens->span($from, $to)) {
- $mtt->add(
- term => '>:mdp/d:' . $label,
- pti => 33, # term-to-element relation
- payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_start . # right part start position
- '<i>' . $target->p_end # . # right part end position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_span->tui # right part tui
- );
+ # Relation is possibly term-to-element with a found target!
+ elsif ($target = $tokens->span($from, $to)) {
+ $mtt->add(
+ term => '>:mdp/d:' . $label,
+ pti => 33, # term-to-element relation
+ payload =>
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_start . # right part start position
+ '<i>' . $target->get_p_end # . # right part end position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_span->tui # right part tui
+ );
- my $target_mtt = $stream->pos($target->p_start);
- $target_mtt->add(
- term => '<:mdp/d:' . $label,
- pti => 34, # element-to-term relation
- payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_end . # right part end position
- '<i>' . $source->pos # . # left part token position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_span->tui # right part tui
-
- );
- }
- else {
- use Data::Dumper;
- $$self->log->warn('Relation currently not supported: ' . Dumper($content));
- };
+ my $target_mtt = $stream->pos($target->get_p_start);
+ $target_mtt->add(
+ term => '<:mdp/d:' . $label,
+ pti => 34, # element-to-term relation
+ payload =>
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_end . # right part end position
+ '<i>' . $source->get_pos # . # left part token position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_span->tui # right part tui
+ );
+ }
+ else {
+ use Data::Dumper;
+ $$self->log->warn('Relation currently not supported: ' . Dumper($content));
+ };
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/Malt/Dependency.pm b/lib/KorAP/XML/Annotation/Malt/Dependency.pm
index a7bd31b..8ea01a5 100644
--- a/lib/KorAP/XML/Annotation/Malt/Dependency.pm
+++ b/lib/KorAP/XML/Annotation/Malt/Dependency.pm
@@ -17,10 +17,10 @@
my ($stream, $source, $tokens) = @_;
# Get MultiTermToken from stream for source
- my $mtt = $stream->pos($source->pos);
+ my $mtt = $stream->pos($source->get_pos);
# Serialized information from token
- my $content = $source->hash;
+ my $content = $source->get_hash;
# Get relation information
my $rel = $content->{rel};
@@ -28,72 +28,71 @@
# Iterate over relations
foreach (@$rel) {
- my $label = $_->{-label};
+ my $label = $_->{-label};
- #my $target = $stream->tui($source->pos);
- my $from = $_->{span}->{-from};
- my $to = $_->{span}->{-to};
+ #my $target = $stream->tui($source->pos);
+ my $from = $_->{span}->{-from};
+ my $to = $_->{span}->{-to};
- # Target
- my $target = $tokens->token($from, $to);
+ # Target
+ my $target = $tokens->token($from, $to);
- # Relation is term-to-term with a found target!
- if ($target) {
+ # Relation is term-to-term with a found target!
+ if ($target) {
- # Unary means, it refers to itself!
- $mtt->add(
- term => '>:malt/d:' . $label,
- pti => 32, # term-to-term relation
- payload =>
- '<i>' . $target->pos # . # right part token position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_term->tui # right part tui
- );
+ # Unary means, it refers to itself!
+ $mtt->add(
+ term => '>:malt/d:' . $label,
+ pti => 32, # term-to-term relation
+ payload =>
+ '<i>' . $target->get_pos # . # right part token position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_term->tui # right part tui
+ );
- my $target_mtt = $stream->pos($target->pos);
+ my $target_mtt = $stream->pos($target->get_pos);
- $target_mtt->add(
- term => '<:malt/d:' . $label,
- pti => 32, # term-to-term relation
- payload =>
- '<i>' . $source->pos # . # left part token position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_term->tui # right part tui
- );
- }
+ $target_mtt->add(
+ term => '<:malt/d:' . $label,
+ pti => 32, # term-to-term relation
+ payload =>
+ '<i>' . $source->get_pos # . # left part token position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_term->tui # right part tui
+ );
+ }
- # Relation is possibly term-to-element with a found target!
- elsif ($target = $tokens->span($from, $to)) {
- $mtt->add(
- term => '>:malt/d:' . $label,
- pti => 33, # term-to-element relation
- payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_start . # right part start position
- '<i>' . $target->p_end # . # right part end position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_span->tui # right part tui
- );
+ # Relation is possibly term-to-element with a found target!
+ elsif ($target = $tokens->span($from, $to)) {
+ $mtt->add(
+ term => '>:malt/d:' . $label,
+ pti => 33, # term-to-element relation
+ payload =>
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_start . # right part start position
+ '<i>' . $target->get_p_end # . # right part end position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_span->tui # right part tui
+ );
- my $target_mtt = $stream->pos($target->p_start);
- $target_mtt->add(
- term => '<:malt/d:' . $label,
- pti => 34, # element-to-term relation
- payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_end . # right part end position
- '<i>' . $source->pos # . # left part token position
- # '<s>0' . # $source_term->tui . # left part tui
- # '<s>0' # . $target_span->tui # right part tui
-
- );
- }
- else {
- use Data::Dumper;
- $$self->log->warn('Relation currently not supported: ' . Dumper($content));
- };
+ my $target_mtt = $stream->pos($target->get_p_start);
+ $target_mtt->add(
+ term => '<:malt/d:' . $label,
+ pti => 34, # element-to-term relation
+ payload =>
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_end . # right part end position
+ '<i>' . $source->get_pos # . # left part token position
+ # '<s>0' . # $source_term->tui . # left part tui
+ # '<s>0' # . $target_span->tui # right part tui
+ );
+ }
+ else {
+ use Data::Dumper;
+ $$self->log->warn('Relation currently not supported: ' . Dumper($content));
+ };
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/MarMoT/Morpho.pm b/lib/KorAP/XML/Annotation/MarMoT/Morpho.pm
index 212d8fb..ced57ab 100644
--- a/lib/KorAP/XML/Annotation/MarMoT/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/MarMoT/Morpho.pm
@@ -10,9 +10,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
diff --git a/lib/KorAP/XML/Annotation/Mate/Dependency.pm b/lib/KorAP/XML/Annotation/Mate/Dependency.pm
index 6d242c9..7a85859 100644
--- a/lib/KorAP/XML/Annotation/Mate/Dependency.pm
+++ b/lib/KorAP/XML/Annotation/Mate/Dependency.pm
@@ -19,10 +19,10 @@
my ($stream, $source, $tokens) = @_;
# Get MultiTermToken from stream for source
- my $mtt = $stream->pos($source->pos);
+ my $mtt = $stream->pos($source->get_pos);
# Serialized information from token
- my $content = $source->hash;
+ my $content = $source->get_hash;
# Get relation information
my $rel = $content->{rel};
@@ -46,7 +46,7 @@
# );
# Target is at the same position!
- my $pos = $source->pos;
+ my $pos = $source->get_pos;
my %rel = (
pti => 32, # term-to-term relation
@@ -95,17 +95,17 @@
term => '>:mate/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $target->pos # . # right part token position
+ '<i>' . $target->get_pos # . # right part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_term->tui # right part tui
);
- my $target_mtt = $stream->pos($target->pos);
+ my $target_mtt = $stream->pos($target->get_pos);
$target_mtt->add(
term => '<:mate/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $source->pos # . # left part token position
+ '<i>' . $source->get_pos # . # left part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_term->tui # right part tui
);
@@ -139,7 +139,7 @@
'<i>' . $target->o_start . # end position
'<i>' . $target->o_end . # end position
'<i>' . $target->p_end . # right part end position
- '<i>' . $source->pos # . # left part token position
+ '<i>' . $source->get_pos # . # left part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_span->tui # right part tui
diff --git a/lib/KorAP/XML/Annotation/Mate/Morpho.pm b/lib/KorAP/XML/Annotation/Mate/Morpho.pm
index 7f21afa..0d1744b 100644
--- a/lib/KorAP/XML/Annotation/Mate/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/Mate/Morpho.pm
@@ -9,9 +9,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
@@ -19,30 +19,30 @@
foreach my $f (@{$content->{fs}->{f}}) {
- #pos
- if (($f->{-name} eq 'pos') &&
- ($found = $f->{'#text'})) {
- $mtt->add(term => 'mate/p:' . $found);
- }
+ #pos
+ if (($f->{-name} eq 'pos') &&
+ ($found = $f->{'#text'})) {
+ $mtt->add(term => 'mate/p:' . $found);
+ }
- # lemma
- elsif (($f->{-name} eq 'lemma')
- && ($found = $f->{'#text'})
- && $found ne '--') {
- # b($found)->decode('latin-1')->encode->to_string
- $mtt->add(term => 'mate/l:' . $found);
- }
+ # lemma
+ elsif (($f->{-name} eq 'lemma')
+ && ($found = $f->{'#text'})
+ && $found ne '--') {
+ # b($found)->decode('latin-1')->encode->to_string
+ $mtt->add(term => 'mate/l:' . $found);
+ }
- # MSD
- elsif (($f->{-name} eq 'msd') &&
- ($found = $f->{'#text'}) &&
- ($found ne '_')) {
- foreach (split '\|', $found) {
- my ($x, $y) = split "=", $_;
- # case, tense, number, mood, person, degree, gender
- $mtt->add(term => 'mate/m:' . $x . ($y ? ':' . $y : ''));
- };
- };
+ # MSD
+ elsif (($f->{-name} eq 'msd') &&
+ ($found = $f->{'#text'}) &&
+ ($found ne '_')) {
+ foreach (split '\|', $found) {
+ my ($x, $y) = split "=", $_;
+ # case, tense, number, mood, person, degree, gender
+ $mtt->add(term => 'mate/m:' . $x . ($y ? ':' . $y : ''));
+ };
+ };
};
}) or return;
@@ -50,7 +50,7 @@
};
sub layer_info {
- ['mate/l=tokens', 'mate/p=tokens', 'mate/m=tokens']
+ ['mate/l=tokens', 'mate/p=tokens', 'mate/m=tokens']
}
1;
diff --git a/lib/KorAP/XML/Annotation/Mate/MorphoAttr.pm b/lib/KorAP/XML/Annotation/Mate/MorphoAttr.pm
index ce140e7..67818f6 100644
--- a/lib/KorAP/XML/Annotation/Mate/MorphoAttr.pm
+++ b/lib/KorAP/XML/Annotation/Mate/MorphoAttr.pm
@@ -11,9 +11,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my ($found, $pos, $msd, $tui);
@@ -21,50 +21,50 @@
foreach my $f (@{$content->{fs}->{f}}) {
- #pos
- if (($f->{-name} eq 'pos') && ($found = $f->{'#text'})) {
- $pos = $found;
- }
+ #pos
+ if (($f->{-name} eq 'pos') && ($found = $f->{'#text'})) {
+ $pos = $found;
+ }
- # lemma
- elsif (($f->{-name} eq 'lemma')
- && ($found = $f->{'#text'})
- && $found ne '--') {
- $mtt->add(term => 'mate/l:' . $found);
- }
+ # lemma
+ elsif (($f->{-name} eq 'lemma')
+ && ($found = $f->{'#text'})
+ && $found ne '--') {
+ $mtt->add(term => 'mate/l:' . $found);
+ }
- # MSD
- elsif (($f->{-name} eq 'msd') &&
- ($found = $f->{'#text'}) &&
- ($found ne '_')) {
- $msd = $found;
- $tui = $mtt->id_counter;
- };
+ # MSD
+ elsif (($f->{-name} eq 'msd') &&
+ ($found = $f->{'#text'}) &&
+ ($found ne '_')) {
+ $msd = $found;
+ $tui = $mtt->id_counter;
+ };
};
my %term = (
- term => 'mate/p:' . $pos
+ term => 'mate/p:' . $pos
);
# There are attributes needed
if ($tui) {
- $term{pti} = 128;
- $term{payload} = '<s>' . $tui
- };;
+ $term{pti} = 128;
+ $term{payload} = '<s>' . $tui
+ };
$mtt->add(%term);
# MSD
if ($msd) {
- foreach (split '\|', $msd) {
- my ($x, $y) = split "=", $_;
- # case, tense, number, mood, person, degree, gender
- $mtt->add(
- term => '@:' . $x . ($y ? '=' . $y : ''),
- pti => 16,
- payload => '<s>' . $tui
- );
- };
+ foreach (split '\|', $msd) {
+ my ($x, $y) = split "=", $_;
+ # case, tense, number, mood, person, degree, gender
+ $mtt->add(
+ term => '@:' . $x . ($y ? '=' . $y : ''),
+ pti => 16,
+ payload => '<s>' . $tui
+ );
+ };
};
}) or return;
@@ -72,7 +72,7 @@
};
sub layer_info {
- ['mate/l=tokens', 'mate/p=tokens']
+ ['mate/l=tokens', 'mate/p=tokens']
};
1;
diff --git a/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm b/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm
index 3eb2e37..6b3175a 100644
--- a/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm
@@ -8,18 +8,18 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f} or return;
+ my $content = $token->get_hash->{fs}->{f} or return;
$content = $content->{fs}->{f};
my $found;
# syntax
if (($content->{-name} eq 'pos') && ($content->{'#text'})) {
- $mtt->add(
- term => 'opennlp/p:' . $content->{'#text'}
- ) if $content->{'#text'};
+ $mtt->add(
+ term => 'opennlp/p:' . $content->{'#text'}
+ ) if $content->{'#text'};
};
}) or return;
@@ -27,7 +27,7 @@
};
sub layer_info {
- ['opennlp/p=tokens'];
+ ['opennlp/p=tokens'];
};
1;
diff --git a/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm b/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm
index b448817..954cda3 100644
--- a/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm
@@ -10,14 +10,14 @@
layer => 'sentences',
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
$mtt->add(
- term => '<>:opennlp/s:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- pti => 64,
- payload => '<b>0'
+ term => '<>:opennlp/s:s',
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
+ pti => 64,
+ payload => '<b>0'
);
$i++;
}
diff --git a/lib/KorAP/XML/Annotation/RWK/Morpho.pm b/lib/KorAP/XML/Annotation/RWK/Morpho.pm
index 517d0c6..8d9d822 100644
--- a/lib/KorAP/XML/Annotation/RWK/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/RWK/Morpho.pm
@@ -9,9 +9,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
diff --git a/lib/KorAP/XML/Annotation/RWK/Structure.pm b/lib/KorAP/XML/Annotation/RWK/Structure.pm
index 0d1bb2f..18edae7 100644
--- a/lib/KorAP/XML/Annotation/RWK/Structure.pm
+++ b/lib/KorAP/XML/Annotation/RWK/Structure.pm
@@ -19,7 +19,7 @@
my ($stream, $span) = @_;
# Read feature
- my $feature = $span->hash->{fs}->{f};
+ my $feature = $span->get_hash->{fs}->{f};
my $attrs;
# Get attributes
@@ -34,14 +34,14 @@
# Check only for anchors
if ($name eq 's-milestone') {
- push @{$milestones{s}}, [ $span->p_start, $span->o_start ];
+ push @{$milestones{s}}, [ $span->get_p_start, $span->get_o_start ];
}
elsif ($name eq 'p-milestone') {
- push @{$milestones{p}}, [ $span->p_start, $span->o_start ];
+ push @{$milestones{p}}, [ $span->get_p_start, $span->get_o_start ];
}
else {
- $last_p = $span->p_start;
- $last_o = $span->o_end;
+ $last_p = $span->get_p_start;
+ $last_o = $span->get_o_end;
}
}
) or return;
diff --git a/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm b/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm
index a8a169b..6cb6711 100644
--- a/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm
+++ b/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm
@@ -10,40 +10,40 @@
layer => 'lemma',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
my $capital = 0;
my $lemmata = (ref $content->{fs}->{f} eq 'ARRAY') ?
- $content->{fs}->{f} : [$content->{fs}->{f}];
+ $content->{fs}->{f} : [$content->{fs}->{f}];
my $first = 0;
# Iterate over all lemmata
foreach my $f (@$lemmata) {
- # lemma
- if (($f->{-name} eq 'lemma')
- && ($found = $f->{'#text'})) {
+ # lemma
+ if (($f->{-name} eq 'lemma')
+ && ($found = $f->{'#text'})) {
- # $found = b($found)->decode('latin-1')->encode->to_string;
- # warn $found;
+ # $found = b($found)->decode('latin-1')->encode->to_string;
+ # warn $found;
- unless ($first++) {
- $mtt->add(
- term => 'sgbr/l:' . $found
- );
- }
- else {
- $mtt->add(
- term => 'sgbr/lv:' . $found
- );
- };
- };
+ unless ($first++) {
+ $mtt->add(
+ term => 'sgbr/l:' . $found
+ );
+ }
+ else {
+ $mtt->add(
+ term => 'sgbr/lv:' . $found
+ );
+ };
+ };
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/Sgbr/Morpho.pm b/lib/KorAP/XML/Annotation/Sgbr/Morpho.pm
index 9eb261b..c3e35d7 100644
--- a/lib/KorAP/XML/Annotation/Sgbr/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/Sgbr/Morpho.pm
@@ -9,22 +9,22 @@
layer => 'ana',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
my $found;
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $pos = (ref $content->{fs}->{f} eq 'ARRAY') ?
- $content->{fs}->{f} : [$content->{fs}->{f}];
+ $content->{fs}->{f} : [$content->{fs}->{f}];
# Iterate over all lemmata
foreach my $f (@$pos) {
- # lemma
- if (($f->{-name} eq 'ctag')
- && ($found = $f->{'#text'})) {
- # b($found)->decode('latin-1')->encode->to_string
- $mtt->add(term => 'sgbr/p:' . $found);
- };
+ # lemma
+ if (($f->{-name} eq 'ctag')
+ && ($found = $f->{'#text'})) {
+ # b($found)->decode('latin-1')->encode->to_string
+ $mtt->add(term => 'sgbr/p:' . $found);
+ };
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/Talismane/Dependency.pm b/lib/KorAP/XML/Annotation/Talismane/Dependency.pm
index ea7bbf3..efd301e 100644
--- a/lib/KorAP/XML/Annotation/Talismane/Dependency.pm
+++ b/lib/KorAP/XML/Annotation/Talismane/Dependency.pm
@@ -14,10 +14,10 @@
my ($stream, $source, $tokens) = @_;
# Get MultiTermToken from stream for source
- my $mtt = $stream->pos($source->pos);
+ my $mtt = $stream->pos($source->get_pos);
# Serialized information from token
- my $content = $source->hash;
+ my $content = $source->get_hash;
# Get relation information
my $rel = $content->{rel};
@@ -40,18 +40,18 @@
term => '>:talismane/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $target->pos # . # right part token position
+ '<i>' . $target->get_pos # . # right part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_term->tui # right part tui
);
- my $target_mtt = $stream->pos($target->pos);
+ my $target_mtt = $stream->pos($target->get_pos);
$target_mtt->add(
term => '<:talismane/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $source->pos # . # left part token position
+ '<i>' . $source->get_pos # . # left part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_term->tui # right part tui
);
@@ -63,23 +63,23 @@
term => '>:talismane/d:' . $label,
pti => 33, # term-to-element relation
payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_start . # right part start position
- '<i>' . $target->p_end # . # right part end position
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_start . # right part start position
+ '<i>' . $target->get_p_end # . # right part end position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_span->tui # right part tui
);
- my $target_mtt = $stream->pos($target->p_start);
+ my $target_mtt = $stream->pos($target->get_p_start);
$target_mtt->add(
term => '<:talismane/d:' . $label,
pti => 34, # element-to-term relation
payload =>
- '<i>' . $target->o_start . # end position
- '<i>' . $target->o_end . # end position
- '<i>' . $target->p_end . # right part end position
- '<i>' . $source->pos # . # left part token position
+ '<i>' . $target->get_o_start . # end position
+ '<i>' . $target->get_o_end . # end position
+ '<i>' . $target->get_p_end . # right part end position
+ '<i>' . $source->get_pos # . # left part token position
# '<s>0' . # $source_term->tui . # left part tui
# '<s>0' # . $target_span->tui # right part tui
diff --git a/lib/KorAP/XML/Annotation/Talismane/Morpho.pm b/lib/KorAP/XML/Annotation/Talismane/Morpho.pm
index 85a0786..bd5c30b 100644
--- a/lib/KorAP/XML/Annotation/Talismane/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/Talismane/Morpho.pm
@@ -9,9 +9,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
diff --git a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
index 54d2436..9215595 100644
--- a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
@@ -10,9 +10,9 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f};
my $found;
@@ -84,7 +84,7 @@
};
sub layer_info {
- ['tt/p=tokens', 'tt/l=tokens']
+ ['tt/p=tokens', 'tt/l=tokens']
};
1;
diff --git a/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm b/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm
index 721ea5c..567dace 100644
--- a/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm
@@ -10,14 +10,14 @@
layer => 'sentences',
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
$mtt->add(
- term => '<>:tt/s:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- pti => 64,
- payload => '<b>0' # Could be 2 as well t/p/s
+ term => '<>:tt/s:s',
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
+ pti => 64,
+ payload => '<b>0' # Could be 2 as well t/p/s
);
$i++;
}
@@ -29,7 +29,7 @@
};
sub layer_info {
- ['tt/s=spans'];
+ ['tt/s=spans'];
};
diff --git a/lib/KorAP/XML/Annotation/XIP/Constituency.pm b/lib/KorAP/XML/Annotation/XIP/Constituency.pm
index 7fd05f9..1637279 100644
--- a/lib/KorAP/XML/Annotation/XIP/Constituency.pm
+++ b/lib/KorAP/XML/Annotation/XIP/Constituency.pm
@@ -26,13 +26,13 @@
my ($stream, $span) = @_;
# Collect the span
- $xip_const{$span->id} = $span;
- # warn 'Remember ' . $span->id;
+ $xip_const{$span->get_id} = $span;
+ # warn 'Remember ' . $span->get_id;
# It's probably a root
- $xip_const_root->insert($span->id);
+ $xip_const_root->insert($span->get_id);
- my $rel = $span->hash->{rel} or return;
+ my $rel = $span->get_hash->{rel} or return;
$rel = [$rel] unless ref $rel eq 'ARRAY';
@@ -71,9 +71,9 @@
weaken $xip_const_noroot;
# Get the correct position for the span
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
- my $content = $span->hash;
+ my $content = $span->get_hash;
my $f = $content->{fs}->{f};
unless ($f->{-name} eq 'const') {
@@ -91,9 +91,9 @@
# $type is now NPA, NP, NUM ...
my %term = (
term => '<>:xip/c:' . $type,
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
pti => 64
);
@@ -133,9 +133,7 @@
my $subspan = delete $xip_const{$target};
# warn "A-Forgot about $target: " . ($subspan ? 'yes' : 'no');
- unless ($subspan) {
- next;
- };
+ next unless $subspan;
# warn "Span " . $target . " not found";
$this->($subspan, $level + 1);
@@ -151,9 +149,7 @@
# warn "B-Forgot about $_: " . ($obj ? 'yes' : 'no');
- unless ($obj) {
- next;
- };
+ next unless $obj;
$add_const->($obj, 0);
};
diff --git a/lib/KorAP/XML/Annotation/XIP/Morpho.pm b/lib/KorAP/XML/Annotation/XIP/Morpho.pm
index 1760d25..012207d 100644
--- a/lib/KorAP/XML/Annotation/XIP/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/XIP/Morpho.pm
@@ -10,48 +10,48 @@
encoding => 'xip',
cb => sub {
my ($stream, $token) = @_;
- my $mtt = $stream->pos($token->pos);
+ my $mtt = $stream->pos($token->get_pos);
- my $content = $token->hash->{fs}->{f}->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f}->{fs}->{f};
my $found;
my $capital = 0;
foreach (@$content) {
- # pos
- if (($_->{-name} eq 'pos') &&
- ($found = $_->{'#text'})) {
- $mtt->add(
- term => 'xip/p:' . $found
- );
+ # pos
+ if (($_->{-name} eq 'pos') &&
+ ($found = $_->{'#text'})) {
+ $mtt->add(
+ term => 'xip/p:' . $found
+ );
- $capital = 1 if $found eq 'NOUN';
- }
+ $capital = 1 if $found eq 'NOUN';
+ }
};
foreach (@$content) {
- # lemma
- if (($_->{-name} eq 'lemma') &&
- ($found = $_->{'#text'})) {
+ # lemma
+ if (($_->{-name} eq 'lemma') &&
+ ($found = $_->{'#text'})) {
- # Verb delimiter (aus=druecken)
- $mtt->add(term => 'xip/l:' . $found);
- if ($found =~ tr/=//d) {
- $mtt->add(term => 'xip/l:' . $found);
- };
+ # Verb delimiter (aus=druecken)
+ $mtt->add(term => 'xip/l:' . $found);
+ if ($found =~ tr/=//d) {
+ $mtt->add(term => 'xip/l:' . $found);
+ };
- # Composites
- my (@token) = split('#', $found);
+ # Composites
+ my (@token) = split('#', $found);
- next if @token == 1;
+ next if @token == 1;
- my $full = '';
- foreach (@token) {
- $full .= $_;
- $_ =~ s{/\w+$}{};
- $mtt->add(term => 'xip/l:#' . $_);
- };
- };
+ my $full = '';
+ foreach (@token) {
+ $full .= $_;
+ $_ =~ s{/\w+$}{};
+ $mtt->add(term => 'xip/l:#' . $_);
+ };
+ };
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/XIP/Sentences.pm b/lib/KorAP/XML/Annotation/XIP/Sentences.pm
index 56a0e53..03cb585 100644
--- a/lib/KorAP/XML/Annotation/XIP/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/XIP/Sentences.pm
@@ -13,14 +13,14 @@
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
$mtt->add(
- term => '<>:xip/s:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- pti => 64,
- payload => '<b>0' # Could be 2 as well for t/p/s
+ term => '<>:xip/s:s',
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end,
+ pti => 64,
+ payload => '<b>0' # Could be 2 as well for t/p/s
);
$i++;
}
@@ -32,7 +32,7 @@
};
sub layer_info {
- ['xip/s=spans'];
+ ['xip/s=spans'];
};
diff --git a/lib/KorAP/XML/Index/MultiTermToken.pm b/lib/KorAP/XML/Index/MultiTermToken.pm
index 2815eff..cae6cba 100644
--- a/lib/KorAP/XML/Index/MultiTermToken.pm
+++ b/lib/KorAP/XML/Index/MultiTermToken.pm
@@ -64,11 +64,11 @@
};
sub surface {
- substr($_[0]->[0]->[0]->term,2);
+ substr($_[0]->[MT]->[0]->term,2);
};
sub lc_surface {
- substr($_[0]->[0]->[1]->term,2);
+ substr($_[0]->[MT]->[1]->term,2);
};
sub to_array {
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index 65daa94..c2d0c55 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -342,7 +342,7 @@
if ($cb) {
foreach (@$spanarray) {
- $cb->($self->stream, $_) if defined $_->p_start;
+ $cb->($self->stream, $_) if defined $_->get_p_start;
};
return 1;
};
@@ -398,7 +398,7 @@
if ($cb) {
foreach (@$tokenarray) {
# weaken $tokens;
- $cb->($self->stream, $_, $tokens) if defined $_->pos;
+ $cb->($self->stream, $_, $tokens) if defined $_->get_pos;
#, $tokens);
};
return 1;
@@ -771,12 +771,12 @@
layer => 'sentences',
cb => sub {
my ($stream, $span) = @_;
- my $mtt = $stream->pos($span->p_start);
+ my $mtt = $stream->pos($span->get_p_start);
$mtt->add(
term => '<>:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end
+ o_start => $span->get_o_start,
+ o_end => $span->get_o_end,
+ p_end => $span->get_p_end
);
}
);
diff --git a/lib/KorAP/XML/Tokenizer/Span.pm b/lib/KorAP/XML/Tokenizer/Span.pm
index 962d002..0162ac2 100644
--- a/lib/KorAP/XML/Tokenizer/Span.pm
+++ b/lib/KorAP/XML/Tokenizer/Span.pm
@@ -4,6 +4,19 @@
use Mojo::DOM;
use Clone;
+use constant {
+ O_START => 0,
+ O_END => 1,
+ P_START => 2,
+ P_END => 3,
+ ID => 4,
+ CONTENT => 5,
+ DOM => 6,
+ HASH => 7,
+ MILESTONE => 8,
+ PTI => 9
+};
+
sub new {
bless [], shift;
};
@@ -12,123 +25,103 @@
'span';
};
-sub o_start {
- if (defined $_[1]) {
- $_[0]->[0] = $_[1];
- };
- $_[0]->[0];
-};
-
sub set_o_start {
- $_[0]->[0] = $_[1];
+ $_[0]->[O_START] = $_[1];
};
-sub o_end {
- if (defined $_[1]) {
- $_[0]->[1] = $_[1];
- };
- $_[0]->[1];
+sub get_o_start {
+ $_[0]->[O_START]
};
sub set_o_end {
- $_[0]->[1] = $_[1];
+ $_[0]->[O_END] = $_[1];
};
-sub p_start {
- if (defined $_[1]) {
- $_[0]->[2] = $_[1];
- };
- $_[0]->[2];
+sub get_o_end {
+ $_[0]->[O_END]
};
sub set_p_start {
- $_[0]->[2] = $_[1];
+ $_[0]->[P_START] = $_[1];
};
-sub p_end {
- if (defined $_[1]) {
- $_[0]->[3] = $_[1];
- };
- $_[0]->[3];
+sub get_p_start {
+ $_[0]->[P_START]
};
sub set_p_end {
- $_[0]->[3] = $_[1];
+ $_[0]->[P_END] = $_[1];
};
-sub id {
- if (defined $_[1]) {
- $_[0]->[4] = $_[1];
- };
- $_[0]->[4];
+sub get_p_end {
+ $_[0]->[P_END];
};
-sub content {
- if (defined $_[1]) {
- $_[0]->[5] = $_[1];
- }
- else {
- return $_[0]->[5];
- };
+sub set_id {
+ $_[0]->[ID] = $_[1];
+};
+
+sub get_id {
+ $_[0]->[ID];
+};
+
+sub set_content {
+ $_[0]->[CONTENT] = $_[1];
+};
+
+sub get_content {
+ $_[0]->[CONTENT];
};
sub dom {
- if ($_[0]->[6]) {
- return $_[0]->[6];
+ if ($_[0]->[DOM]) {
+ return $_[0]->[DOM];
}
else {
- my $c = Mojo::DOM->new($_[0]->[5]);
+ my $c = Mojo::DOM->new($_[0]->[CONTENT]);
$c->xml(1);
- return $_[0]->[6] = $c;
+ return $_[0]->[DOM] = $c;
};
};
-sub hash {
- if (defined $_[1]) {
- $_[0]->[7] = $_[1];
- }
- else {
- return $_[0]->[7];
- };
+sub set_hash {
+ $_[0]->[HASH] = $_[1];
};
-
-sub milestone {
- if (defined $_[1]) {
- $_[0]->[8] = 1;
- };
- $_[0]->[8] ? 1 : 0;
+sub get_hash {
+ return $_[0]->[HASH];
};
-
-#sub tui {
-# if (defined $_[1]) {
-# $_[0]->[9] = $_[1];
-# };
-# $_[0]->[9];
-#};
-
-sub pti {
- if (defined $_[1]) {
- $_[0]->[10] = $_[1];
- };
- $_[0]->[10];
+sub set_milestone {
+ $_[0]->[MILESTONE] = 1;
};
+sub get_milestone {
+ $_[0]->[MILESTONE] ? 1 : 0;
+};
+
+sub set_pti {
+ $_[0]->[PTI] = $_[1];
+};
+
+sub get_pti {
+ $_[0]->[PTI];
+};
sub to_string {
my $v = shift;
{
no warnings;
- return '[(' . $v->[0] . ':' . $v->[1] . '|' .
- $v->[2] . ':' . $v->[3] . ')' .
- $v->[4] . '-' .$v->[5] . ']';
+ return '[(' . $v->[O_START] . ':' . $v->[O_END] . '|' .
+ $v->[P_START] . ':' . $v->[P_END] . ')' .
+ $v->[ID] . '-' .$v->[CONTENT] . ']';
};
};
-
# Clone the span
sub clone {
+ # TODO:
+ # Optionally clone without DOM and treat hash specially
return Clone::clone(shift);
};
diff --git a/lib/KorAP/XML/Tokenizer/Token.pm b/lib/KorAP/XML/Tokenizer/Token.pm
index 815f9e8..c3cabfa 100644
--- a/lib/KorAP/XML/Tokenizer/Token.pm
+++ b/lib/KorAP/XML/Tokenizer/Token.pm
@@ -3,6 +3,14 @@
use warnings;
use Mojo::DOM;
+use constant {
+ POS => 0,
+ CONTENT => 1,
+ ID => 2,
+ DOM => 3,
+ HASH => 4,
+};
+
sub new {
bless [], shift;
};
@@ -11,63 +19,56 @@
'token';
};
-# get or set token position
-sub pos {
- if (defined $_[1]) {
- $_[0]->[0] = $_[1];
- };
- $_[0]->[0];
+sub set_pos {
+ $_[0]->[POS] = $_[1];
};
-
-# Get or set token content
-sub content {
- if (defined $_[1]) {
- $_[0]->[1] = $_[1];
- }
- else {
- return $_[0]->[1];
- };
+sub get_pos {
+ $_[0]->[POS];
};
-
-# Get or set token id
-sub id {
- if ($_[1]) {
- $_[0]->[2] = $_[1];
- }
- else {
- $_[0]->[2];
- };
+sub set_content {
+ $_[0]->[CONTENT] = $_[1];
};
+sub get_content {
+ $_[0]->[CONTENT];
+};
+
+sub set_id {
+ $_[0]->[ID] = $_[1];
+};
+
+sub get_id {
+ $_[0]->[ID];
+};
sub dom {
- if ($_[0]->[3]) {
- return $_[0]->[3];
+ if ($_[0]->[DOM]) {
+ return $_[0]->[DOM];
}
else {
- my $c = Mojo::DOM->new($_[0]->[1]);
+ my $c = Mojo::DOM->new($_[0]->[CONTENT]);
$c->xml(1);
- return $_[0]->[3] = $c;
+ return $_[0]->[DOM] = $c;
};
};
-sub hash {
- if (defined $_[1]) {
- $_[0]->[4] = $_[1];
- }
- else {
- return $_[0]->[4];
- };
+sub set_hash {
+ return $_[0]->[HASH] = $_[1];
};
+sub get_hash {
+ return $_[0]->[HASH];
+};
+
+
sub to_string {
my $v = shift;
{
no warnings;
- return '[(' . $v->[0] . ')' .
- $v->[1] . '-' .$v->[2] . ']';
+ return '[(' . $v->[POS] . ')' .
+ $v->[CONTENT] . '-' . $v->[ID] . ']';
};
};
diff --git a/lib/KorAP/XML/Tokenizer/Units.pm b/lib/KorAP/XML/Tokenizer/Units.pm
index 7d31626..df22ab0 100644
--- a/lib/KorAP/XML/Tokenizer/Units.pm
+++ b/lib/KorAP/XML/Tokenizer/Units.pm
@@ -32,21 +32,21 @@
# The span is a milestone
if ($from == $to) {
- $span->milestone(1);
+ $span->set_milestone(1);
};
# The span has an id (probably useful)
- $span->id($s->{-id}) if $s && $s->{-id};
+ $span->set_id($s->{-id}) if $s && $s->{-id};
# Set character offsets
- $span->o_start($from);
- $span->o_end($to);
+ $span->set_o_start($from);
+ $span->set_o_end($to);
# Get start position (exactly)
- my $start = $self->match->startswith($span->o_start);
+ my $start = $self->match->startswith($from);
unless (defined $start) {
- $start = $self->range->after($span->o_start);
+ $start = $self->range->after($from);
unless (defined $start) {
if (DEBUG) {
@@ -57,21 +57,21 @@
};
# Set start token position to span
- $span->p_start($start);
+ $span->set_p_start($start);
- if ($span->milestone) {
- $span->p_end($start);
+ if ($span->get_milestone) {
+ $span->set_p_end($start);
}
else {
# Get end position (exactly)
- my $end = $self->match->endswith($span->o_end);
+ my $end = $self->match->endswith($span->get_o_end);
unless (defined $end) {
- $end = $self->range->before($span->o_end);
+ $end = $self->range->before($span->get_o_end);
if (DEBUG && $span->o_end == 196) {
- warn 'SPAN ends at ' . $span->o_end . ' and has ' . $end;
+ warn 'SPAN ends at ' . $span->get_o_end . ' and has ' . $end;
};
unless (defined $end) {
@@ -98,21 +98,21 @@
# return unless $span->p_end >= $span->p_start;
# EXPERIMENTAL:
- unless ($end >= $span->p_start) {
+ unless ($end >= $span->get_p_start) {
if (DEBUG) {
- warn 'Ignore ' . $span->id . ' with ' . $span->p_start . '-' . $end;
+ warn 'Ignore ' . $span->id . ' with ' . $span->get_p_start . '-' . $end;
};
return;
};
- $span->p_end($end + 1);
+ $span->set_p_end($end + 1);
}
if (DEBUG && $from == 124) {
- warn 'exact: ' . $span->p_start . '-' . $span->p_end;
+ warn 'exact: ' . $span->get_p_start . '-' . $span->get_p_end;
};
- $span->hash($s) if $s;
+ $span->set_hash($s) if $s;
$span;
};
@@ -132,10 +132,10 @@
return unless defined $pos;
my $token = KorAP::XML::Tokenizer::Token->new;
- $token->id($s->{-id}) if $s && $s->{-id};
- $token->pos($pos);
+ $token->set_id($s->{-id}) if $s && $s->{-id};
+ $token->set_pos($pos);
- $token->hash($s) if $s;
+ $token->set_hash($s) if $s;
$token;
};