Remove MultiTerm->add() in favor of MultiTerm->add_by_term()
Change-Id: I1df0ed2a545ded204bafad7ca01bec0ce54a94fc
diff --git a/Changes b/Changes
index 1f88c45..eee7df7 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.41 2020-08-04
+0.41 2020-08-05
- Added support for RWK annotations.
- Improved DGD support.
- Fixed bug in RWK support that broke on
@@ -6,6 +6,8 @@
- Separate "real data" test suite from artificial
tests to prepare for CPAN release.
- Optimizations and cleanup based on profiling.
+ - Remove MultiTerm->add() in favor of
+ MultiTerm->add_by_term().
0.40 2020-03-03
- Fixed XIP parser.
diff --git a/lib/KorAP/XML/Index/MultiTerm.pm b/lib/KorAP/XML/Index/MultiTerm.pm
index 0111bee..db75a38 100644
--- a/lib/KorAP/XML/Index/MultiTerm.pm
+++ b/lib/KorAP/XML/Index/MultiTerm.pm
@@ -18,51 +18,12 @@
PAYLOAD => 8,
};
+
+# Construct a multiterm object by passing a term
sub new {
- my $self = bless [], shift;
-
- # TODO:
- # Deprecate!
- for (my $i = 0; $i < scalar @_; $i+=2) {
- if ($_[$i] eq 'term') {
- $self->[TERM] = $_[$i+1];
- }
- elsif ($_[$i] eq 'p_start') {
- $self->[P_START] = $_[$i+1];
- }
- elsif ($_[$i] eq 'p_end') {
- $self->[P_END] = $_[$i+1];
- }
- elsif ($_[$i] eq 'payload') {
- $self->[PAYLOAD] = $_[$i+1];
- }
- elsif ($_[$i] eq 'store_offsets') {
- $self->store_offsets($_[$i+1]);
- }
- elsif ($_[$i] eq 'o_start') {
- $self->[O_START] = $_[$i+1];
- }
- elsif ($_[$i] eq 'o_end') {
- $self->[O_END] = $_[$i+1];
- }
- elsif ($_[$i] eq 'pti') {
- $self->[PTI] = $_[$i+1];
- }
- elsif ($_[$i] eq 'tui') {
- $self->[TUI] = $_[$i+1];
- };
- };
- $self;
-};
-
-sub new_from_term {
bless [$_[1]], $_[0];
};
-sub new_blank {
- bless [], shift;
-}
-
sub set_payload {
return $_[0]->[PAYLOAD] = $_[1];
};
diff --git a/lib/KorAP/XML/Index/MultiTermToken.pm b/lib/KorAP/XML/Index/MultiTermToken.pm
index a8085d5..35f7d7a 100644
--- a/lib/KorAP/XML/Index/MultiTermToken.pm
+++ b/lib/KorAP/XML/Index/MultiTermToken.pm
@@ -21,29 +21,8 @@
bless [[]], shift;
};
-
-sub add {
- my $self = shift;
-
- my $mt;
- unless (blessed $_[0]) {
- if (@_ == 1) {
- $mt = KorAP::XML::Index::MultiTerm->new_blank;
- $mt->set_term($_[0]);
- }
- else {
- $mt = KorAP::XML::Index::MultiTerm->new(@_);
- };
- }
- else {
- $mt = $_[0];
- };
- push(@{$self->[MT]}, $mt);
- $mt;
-};
-
sub add_by_term {
- my $mt = KorAP::XML::Index::MultiTerm->new_from_term($_[1]);
+ my $mt = KorAP::XML::Index::MultiTerm->new($_[1]);
push(@{$_[0]->[MT]}, $mt);
$mt;
};
diff --git a/lib/KorAP/XML/Index/MultiTermTokenStream.pm b/lib/KorAP/XML/Index/MultiTermTokenStream.pm
index 0ec97c7..c34b63d 100644
--- a/lib/KorAP/XML/Index/MultiTermTokenStream.pm
+++ b/lib/KorAP/XML/Index/MultiTermTokenStream.pm
@@ -49,24 +49,22 @@
my $tui = $self->tui($unit->get_p_start);
- return $mtt->add(
- term => '<>:' . $term,
- o_start => $unit->get_o_start,
- o_end => $unit->get_o_end,
- p_start => $unit->get_p_start,
- p_end => $unit->get_p_end,
- pti => 64,
- payload => '<b>0<s>' . $tui,
- tui => $tui
- );
-
+ my $mt = $mtt->add_by_term('<>:' . $term);
+ $mt->set_o_start($unit->get_o_start);
+ $mt->set_o_end($unit->get_o_end);
+ $mt->set_p_start($unit->get_p_start);
+ $mt->set_p_end($unit->get_p_end);
+ $mt->set_pti(64);
+ $mt->set_payload('<b>0<s>' . $tui);
+ $mt->set_tui($tui);
+ return $mt;
};
};
sub add_meta {
my $self = shift;
my $pos_0 = $self->pos(0) or return;
- my $mt = $pos_0->add('-:' . shift);
+ my $mt = $pos_0->add_by_term('-:' . shift);
$mt->set_payload(shift);
$mt->set_stored_offsets(0);
};
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index ad38f20..6b5176a 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -236,6 +236,8 @@
my $self = shift;
my $mtts = $self->stream or return;
+ my $mt;
+
foreach my $mtt (@{$mtts->multi_term_tokens}) {
my $o_start = $mtt->o_start;
my $o_end = $mtt->o_end;
@@ -254,29 +256,23 @@
while ($s =~ /(a+)[^a]/g) {
my $from = $-[1];
my $to = $+[1];
- $mtt->add(
- term => 'i^1:' . substr($os, $from, $from + $to),
- o_start => $from + $o_start,
- o_end => $to + $o_start
- ) unless $to - $from == $l;
+ $mt = $mtt->add_by_term('i^1:' . substr($os, $from, $from + $to));
+ $mt->set_o_start($from + $o_start);
+ $mt->set_o_end($to + $o_start) unless $to - $from == $l;
};
while ($s =~ /(0+)[^0]/g) {
my $from = $-[1];
my $to = $+[1];
- $mtt->add(
- term => 'i^2:' . substr($os, $from, $from + $to),
- o_start => $from + $o_start,
- o_end => $to + $o_start
- ) unless $to - $from == $l;
+ $mt = $mtt->add_by_term('i^2:' . substr($os, $from, $from + $to));
+ $mt->set_o_start($from + $o_start);
+ $mt->set_o_end($to + $o_start) unless $to - $from == $l;
};
while ($s =~ /(#)/g) {
my $from = $-[1];
my $to = $+[1];
- $mtt->add(
- term => 'i^3:' . substr($os, $from, $from + $to),
- o_start => $from + $o_start,
- o_end => $to + $o_start
- ) unless $to - $from == $l;
+ $mt = $mtt->add_by_term('i^3:' . substr($os, $from, $from + $to));
+ $mt->set_o_start($from + $o_start);
+ $mt->set_o_end($to + $o_start) unless $to - $from == $l;
};
};
@@ -770,12 +766,10 @@
cb => sub {
my ($stream, $span) = @_;
my $mtt = $stream->pos($span->get_p_start);
- $mtt->add(
- term => '<>:s',
- o_start => $span->get_o_start,
- o_end => $span->get_o_end,
- p_end => $span->get_p_end
- );
+ my $mt = $mtt->add_by_term('<>:s');
+ $mt->set_o_start($span->get_o_start);
+ $mt->set_o_end($span->get_o_end);
+ $mt->set_p_end($span->get_p_end);
}
);
@@ -803,9 +797,7 @@
# syntax
if ((my $found = $content->at('f[name="pos"]')) && ($found = $found->text)) {
- $mtt->add(
- term => 'cnx_syn:' . $found
- );
+ $mtt->add_by_term('cnx_syn:' . $found);
};
});
diff --git a/t/sort_tokens.t b/t/sort_tokens.t
index 0db2c59..d3de475 100644
--- a/t/sort_tokens.t
+++ b/t/sort_tokens.t
@@ -11,50 +11,50 @@
ok(my $mtt = KorAP::XML::Index::MultiTermToken->new, 'New token');
ok(defined $mtt->set_o_start(0), 'Set start character offset');
ok($mtt->set_o_end(5), 'Set end character offset');
-ok($mtt->add(term => '@:k=N',
- pti => 128,
- payload =>'<s>9'), 'Add token');
-ok($mtt->add(term => 'a=N',
- pti => 129,
- payload =>'<b>144'), 'Add token');
-ok($mtt->add(term => '<>:b=N',
- pti => 64,
- o_start => 0,
- o_end => 5,
- p_end => 5), 'Add token');
-ok($mtt->add(term => 'c=N',
- pti => 129,
- payload => '<b>144'), 'Add token');
-ok($mtt->add(term => '<>:d=N',
- pti => 64,
- o_start => 0,
- o_end => 5,
- p_end => 6,
- payload => '<b>7'), 'Add token');
-ok($mtt->add(term => '@:j=N',
- pti => 16,
- payload =>'<s>8'), 'Add token');
-ok($mtt->add(term => '<>:e=ADJ',
- pti => 64,
- o_start => 0,
- o_end => 5,
- p_end => 6,
- payload => '<b>6'), 'Add token');
-ok($mtt->add(term => '<>:f=N',
- pti => 64,
- o_start => 0,
- o_end => 5,
- p_end => 6,
- payload => '<b>5<b>122'), 'Add token');
-ok($mtt->add(term => 'g=N',
- pti => 129,
- payload =>'<b>144'), 'Add token');
-ok($mtt->add(term => '@:h=N',
- pti => 16,
- payload =>'<s>5'), 'Add token');
-ok($mtt->add(term => '@:i=N',
- pti => 16,
- payload =>'<s>3'), 'Add token');
+ok(my $mt = $mtt->add_by_term('@:k=N'), 'Add token');
+$mt->set_pti(128);
+$mt->set_payload('<s>9');
+ok($mt = $mtt->add_by_term('a=N'), 'Add token');
+$mt->set_pti(129);
+$mt->set_payload('<b>144');
+ok($mt = $mtt->add_by_term('<>:b=N'), 'Add token');
+$mt->set_pti(64);
+$mt->set_o_start(0);
+$mt->set_o_end(5);
+$mt->set_p_end(5);
+ok($mt = $mtt->add_by_term('c=N'), 'Add token');
+$mt->set_pti(129);
+$mt->set_payload('<b>144');
+ok($mt = $mtt->add_by_term('<>:d=N'), 'Add token');
+$mt->set_pti(64);
+$mt->set_o_start(0);
+$mt->set_o_end(5);
+$mt->set_p_end(6);
+$mt->set_payload('<b>7');
+ok($mt = $mtt->add_by_term('@:j=N'), 'Add token');
+$mt->set_pti(16);
+$mt->set_payload('<s>8');
+ok($mt = $mtt->add_by_term('<>:e=ADJ'), 'Add token');
+$mt->set_pti(64);
+$mt->set_o_start(0);
+$mt->set_o_end(5);
+$mt->set_p_end(6);
+$mt->set_payload('<b>6');
+ok($mt = $mtt->add_by_term('<>:f=N'), 'Add token');
+$mt->set_pti(64);
+$mt->set_o_start(0);
+$mt->set_o_end(5);
+$mt->set_p_end(6);
+$mt->set_payload('<b>5<b>122');
+ok($mt = $mtt->add_by_term('g=N'), 'Add token');
+$mt->set_pti(129);
+$mt->set_payload('<b>144');
+ok($mt = $mtt->add_by_term('@:h=N'), 'Add token');
+$mt->set_pti(16);
+$mt->set_payload('<s>5');
+ok($mt = $mtt->add_by_term('@:i=N'), 'Add token');
+$mt->set_pti(16);
+$mt->set_payload('<s>3');
is($mtt->to_string,
'[(0-5)<>:b=N$<b>64<i>0<i>5<i>5|' .
@@ -75,60 +75,68 @@
ok($mtt->set_o_end(5), 'Set end character offset');
# 2-7 to 2-4
-ok($mtt->add(term => '<:child-of',
- pti => 35,
- payload => '<i>0<i>0<i>0<i>0'. # character os
- '<i>7<i>2<i>4<s>5<s>4<s>3'
- ), 'New rel');
+ok($mt = $mtt->add_by_term('<:child-of'), 'New rel');
+$mt->set_pti(35);
+$mt->set_payload('<i>0<i>0<i>0<i>0'. # character os
+ '<i>7<i>2<i>4<s>5<s>4<s>3'
+ );
# 2-4 to 3
-ok($mtt->add(term => '<:child-of',
- pti => 34,
- payload => '<i>0<i>0' . # character os
- '<i>4<i>3<s>3<s>3<s>1'
- ), 'New rel');
+ok($mt = $mtt->add_by_term('<:child-of'), 'New rel');
+$mt->set_pti(34);
+$mt->set_payload(
+ '<i>0<i>0' . # character os
+ '<i>4<i>3<s>3<s>3<s>1'
+ );
# 2 to 2-4
# <i>startright<i>endright<s>relation-id<s>left-id<s>right-id
-ok($mtt->add(term => '>:child-of',
- pti => 33,
- payload => '<i>0<i>0'. # character os
- '<i>2<i>4<s>2<s>1<s>3'
- ), 'New rel');
+ok($mt = $mtt->add_by_term('>:child-of'), 'New rel');
+$mt->set_pti(33);
+$mt->set_payload(
+ '<i>0<i>0'. # character os
+ '<i>2<i>4<s>2<s>1<s>3'
+ );
# 2-4 to 2-7
-ok($mtt->add(term => '>:child-of',
- pti => 35,
- payload => '<i>0<i>0<i>0<i>0' . # character os
- '<i>4<i>2<i>7<s>1<s>3<s>4'
- ), 'New rel');
+ok($mt = $mtt->add_by_term('>:child-of'), 'New rel');
+$mt->set_pti(35);
+$mt->set_payload(
+ '<i>0<i>0<i>0<i>0' . # character os
+ '<i>4<i>2<i>7<s>1<s>3<s>4'
+ );
# 2-4 to 4
-ok($mtt->add(term => '<:child-of',
- pti => 34,
- payload => '<i>0<i>0' . # character os
- '<i>4<i>4<s>4<s>3<s>1'), 'New rel');
-
+ok($mt = $mtt->add_by_term('<:child-of'), 'New rel');
+$mt->set_pti(34);
+$mt->set_payload(
+ '<i>0<i>0' . # character os
+ '<i>4<i>4<s>4<s>3<s>1'
+ );
# 2-7 to 1-7
-ok($mtt->add(term => '>:child-of',
- pti => 35,
- payload => '<i>0<i>0<i>0<i>0' . # character os
- '<i>7<i>1<i>7<s>2<s>4<s>2'), 'New rel');
+ok($mt = $mtt->add_by_term('>:child-of'), 'New rel');
+$mt->set_pti(35);
+$mt->set_payload(
+ '<i>0<i>0<i>0<i>0' . # character os
+ '<i>7<i>1<i>7<s>2<s>4<s>2'
+ );
# 2-7 to 4-7
-ok($mtt->add(term => '<:child-of',
- pti => 35,
- payload => '<i>0<i>0<i>0<i>0' . # character os
- '<i>7<i>4<i>7<s>6<s>4<s>2'), 'New rel');
+ok($mt = $mtt->add_by_term('<:child-of'), 'New rel');
+$mt->set_pti(35);
+$mt->set_payload(
+ '<i>0<i>0<i>0<i>0' . # character os
+ '<i>7<i>4<i>7<s>6<s>4<s>2'
+ );
# 2 to 3
-ok($mtt->add(term => '>:child-of',
- pti => 32,
- payload => '<i>3<s>2<s>4<s>2'
- ), 'New rel');
+ok($mt = $mtt->add_by_term('>:child-of'), 'New rel');
+$mt->set_pti(32);
+$mt->set_payload('<i>3<s>2<s>4<s>2');
+
#NOTE: Sorting of the candidate spans can alternatively be done in
# * indexing, instead of here. (first by left positions and then by
diff --git a/t/tokens.t b/t/tokens.t
index 9ea5bcd..319a02e 100644
--- a/t/tokens.t
+++ b/t/tokens.t
@@ -7,14 +7,12 @@
use_ok('KorAP::XML::Index::MultiTerm');
-ok(my $term = KorAP::XML::Index::MultiTerm->new(
- term => 'Baum',
- p_start => 0,
- p_end => 56,
- payload => '<i>56',
- o_start => 34,
- o_end => 120
-), 'Create new object');
+ok(my $term = KorAP::XML::Index::MultiTerm->new('Baum'), 'Create new object');
+$term->set_p_start(0);
+$term->set_p_end(56);
+$term->set_payload('<i>56');
+$term->set_o_start(34);
+$term->set_o_end(120);
is($term->get_term, 'Baum');
is($term->get_p_start, 0);
@@ -24,9 +22,7 @@
is($term->get_payload, '<i>56');
is($term->to_string, 'Baum$<i>34<i>120<i>56<i>56');
-ok($term = KorAP::XML::Index::MultiTerm->new(
- term => 'Baum'
-), 'Create new object');
+ok($term = KorAP::XML::Index::MultiTerm->new('Baum'), 'Create new object');
is($term->get_term, 'Baum');
is($term->get_p_start, 0);
@@ -36,9 +32,7 @@
is($term->get_payload, undef);
is($term->to_string, 'Baum');
-ok($term = KorAP::XML::Index::MultiTerm->new(
- term => 'Ba#um'
-), 'Create new object');
+ok($term = KorAP::XML::Index::MultiTerm->new('Ba#um'), 'Create new object');
is($term->get_term, 'Ba#um');
is($term->get_p_start, 0);
@@ -48,10 +42,8 @@
is($term->get_payload, undef);
is($term->to_string, 'Ba\#um');
-ok($term = KorAP::XML::Index::MultiTerm->new(
- term => 'Ba#u$m',
- payload => '<i>45'
-), 'Create new object');
+ok($term = KorAP::XML::Index::MultiTerm->new('Ba#u$m'), 'Create new object');
+$term->set_payload('<i>45');
is($term->get_term, 'Ba#u$m');
is($term->get_p_start, 0);