Partial fix of dependencies for sorting
Change-Id: I3efc663b82f3d8174fde65e48c7cce58a4831558
diff --git a/lib/KorAP/XML/Field/MultiTermToken.pm b/lib/KorAP/XML/Field/MultiTermToken.pm
index ad14402..98fcd8a 100644
--- a/lib/KorAP/XML/Field/MultiTermToken.pm
+++ b/lib/KorAP/XML/Field/MultiTermToken.pm
@@ -88,26 +88,32 @@
};
# Get relation based positions
-# TODO: Fix!
sub _rel_right_pos {
+ # Both are either < or >
- # There are relation ids!
-
- # token to token - right token
- if ($_[0] =~ m/^<i>(\d+)<s>/o) {
+ # term to term - right token
+ if ($_[1] =~ m/^<i>(\d+)(?:<s>|$)/o) {
return ($1, $1);
}
- # token/span to span - right token (including character offsets)
- elsif ($_[0] =~ m/^<i>\d+<i>\d+<i>(\d+)<i>(\d+)<s>/o) {
+ # term to span - right token
+ # (including character offsets)
+ elsif ($_[0] == 33 && $_[1] =~ m/^(?:<i>\d+){2}<i>(\d+)<i>(\d+)<s>/o) {
return ($1, $2);
}
- # span to token - right token
- elsif ($_[0] =~ m/^<i>(\d+)<s>/o) {
+ # span to term
+ elsif ($_[0] == 34 && $_[1] =~ m/^(?:<i>\d+){3}<i>(\d+)<s>/o) {
return ($1, $1);
+ }
+
+ # span-to-span
+ elsif ($_[0] == 35 && $_[1] =~ m/^(?:<i>\d+){4}<i>(\d+)<i>(\d+)<s>/o) {
+ return ($1, $2);
};
- carp 'Unknown relation format! ' . $_[0];
+
+ # span to term - right token
+ carp 'Unknown relation format! ' . $_[1];
return (0,0);
};
@@ -150,9 +156,13 @@
return 1;
}
else {
+ # Both are either > or <
+
+ warn $a->to_string;
+
# Check for right positions
- (my $a_start, $a_end) = _rel_right_pos($a->[0]);
- (my $b_start, $b_end) = _rel_right_pos($b->[0]);
+ (my $a_start, $a_end) = _rel_right_pos($a->pti, $a->[0]);
+ (my $b_start, $b_end) = _rel_right_pos($b->pti, $b->[0]);
if ($a_start < $b_start) {
return -1;
}
diff --git a/lib/KorAP/XML/Index/Mate/Dependency.pm b/lib/KorAP/XML/Index/Mate/Dependency.pm
index 0d19cdd..86b3463 100644
--- a/lib/KorAP/XML/Index/Mate/Dependency.pm
+++ b/lib/KorAP/XML/Index/Mate/Dependency.pm
@@ -136,9 +136,12 @@
term => '<:mate/d:' . $label,
pti => 34, # element-to-term relation
payload =>
- '<i>' . $source->pos . # left part token position
- '<s>0' . # $source_term->tui . # left part tui
- '<s>0' # . $target_span->tui # right part tui
+ '<i>' . $target->o_start . # end position
+ '<i>' . $target->o_end . # end position
+ '<i>' . $target->p_end . # right part end position
+ '<i>' . $source->pos . # left part token position
+ '<s>0' . # $source_term->tui . # left part tui
+ '<s>0' # . $target_span->tui # right part tui
);
}
diff --git a/t/real/wpd.t b/t/real/wpd.t
index 3dcea9d..efbc89b 100644
--- a/t/real/wpd.t
+++ b/t/real/wpd.t
@@ -72,9 +72,9 @@
my $stream = $tokens->to_data->{data}->{stream};
-is($stream->[77]->[0], '<:mate/d:--$<b>34<i>498<i>499<i>78<i>78<s>0<s>0', 'element to term');
+is($stream->[77]->[0], '<:mate/d:--$<b>34<i>78<s>0<s>0', 'element to term');
#is($stream->[77]->[1], '<>:mate/d:&&&$<b>64<i>498<i>499<i>78<b>0<s>1', 'element to term');
-is($stream->[78]->[0], '>:mate/d:--$<b>33<i>77<i>78<s>0<s>0', 'term to element');
+is($stream->[78]->[0], '>:mate/d:--$<b>33<i>498<i>499<i>77<i>78<s>0<s>0', 'term to element');
# is($stream->[78]->[3], 'mate/d:&&&$<b>128<s>1', 'Node');
diff --git a/t/sort_tokens.t b/t/sort_tokens.t
index fb9ef7d..d777cfd 100644
--- a/t/sort_tokens.t
+++ b/t/sort_tokens.t
@@ -12,67 +12,101 @@
ok(defined $mtt->o_start(0), 'Set start character offset');
ok($mtt->o_end(5), 'Set end character offset');
ok($mtt->add(term => '@:k=N',
+ pti => 128,
payload =>'<s>9'), 'Add token');
ok($mtt->add(term => 'a=N',
+ pti => 129,
payload =>'<b>144'), 'Add token');
ok($mtt->add(term => '<>:b=N',
+ pti => 64,
o_start => 0,
o_end => 5,
p_end => 5), 'Add token');
-ok($mtt->add(term => 'c=N', payload => '<b>144'), 'Add token');
+ok($mtt->add(term => 'c=N',
+ pti => 129,
+ payload => '<b>144'), 'Add token');
ok($mtt->add(term => '<>:d=N',
+ pti => 64,
o_start => 0,
o_end => 5,
p_end => 6,
payload => '<b>7'), 'Add token');
ok($mtt->add(term => '@:j=N',
+ pti => 16,
payload =>'<s>8'), 'Add token');
ok($mtt->add(term => '<>:e=ADJ',
+ pti => 64,
o_start => 0,
o_end => 5,
p_end => 6,
payload => '<b>6'), 'Add token');
ok($mtt->add(term => '<>:f=N',
+ pti => 64,
o_start => 0,
o_end => 5,
p_end => 6,
payload => '<b>5<b>122'), 'Add token');
ok($mtt->add(term => 'g=N',
+ pti => 129,
payload =>'<b>144'), 'Add token');
ok($mtt->add(term => '@:h=N',
+ pti => 16,
payload =>'<s>5'), 'Add token');
ok($mtt->add(term => '@:i=N',
+ pti => 16,
payload =>'<s>3'), 'Add token');
is($mtt->to_string,
- '[(0-5)<>:b=N$<i>0<i>5<i>5|'.
- '<>:e=ADJ$<i>0<i>5<i>6<b>6|'.
- '<>:d=N$<i>0<i>5<i>6<b>7|'.
- '<>:f=N$<i>0<i>5<i>6<b>5<b>122|'.
- '@:i=N$<s>3|'.
- '@:h=N$<s>5|'.
- '@:j=N$<s>8|'.
- '@:k=N$<s>9|'.
- 'a=N$<b>144|'.
- 'c=N$<b>144|'.
- 'g=N$<b>144]', 'Check string');
+ '[(0-5)<>:b=N$<b>64<i>0<i>5<i>5|' .
+ '<>:e=ADJ$<b>64<i>0<i>5<i>6<b>6|' .
+ '<>:d=N$<b>64<i>0<i>5<i>6<b>7|' .
+ '<>:f=N$<b>64<i>0<i>5<i>6<b>5<b>122|' .
+ '@:i=N$<b>16<s>3|' .
+ '@:h=N$<b>16<s>5|' .
+ '@:j=N$<b>16<s>8|' .
+ '@:k=N$<b>128<s>9|' .
+ 'a=N$<b>129<b>144|' .
+ 'c=N$<b>129<b>144|' .
+ 'g=N$<b>129<b>144]',
+ 'Check string');
ok($mtt = KorAP::XML::Field::MultiTermToken->new, 'New token');
ok(defined $mtt->o_start(0), 'Set start character offset');
ok($mtt->o_end(5), 'Set end character offset');
# 2-7 to 2-4
-ok($mtt->add(term => '<:child-of', p_end => 7, payload => '<i>2<i>4<s>5<s>4<s>3'), 'New rel');
+ok($mtt->add(term => '<:child-of',
+ pti => 35,
+ payload => '<i>0<i>0<i>0<i>0'. # character os
+ '<i>7<i>2<i>4<s>0<s>0'
+ ), 'New rel');
# 2-4 to 3
-ok($mtt->add(term => '<:child-of', p_end => 4, payload => '<b>0<i>3<s>3<s>3<s>1'), 'New rel');
+ok($mtt->add(term => '<:child-of',
+ p_end => 4,
+ pti => 34,
+ payload => '<i>0<i>0' . # character os
+ '<i>4<i>3<s>0<s>0<s>0'
+ ), 'New rel');
# 2 to 2-4
# <i>startright<i>endright<s>relation-id<s>left-id<s>right-id
-ok($mtt->add(term => '>:child-of', payload => '<i>2<i>4<s>2<s>1<s>3'), 'New rel');
+ok($mtt->add(term => '>:child-of',
+ pti => 33,
+ payload => '<i>0<i>0'. # character os
+ '<i>2<i>4<s>0<s>0<s>0'
+ ), 'New rel');
# 2-4 to 2-7
-ok($mtt->add(term => '>:child-of', p_end => 4, payload => '<i>2<i>7<s>1<s>3<s>4'), 'New rel');
+ok($mtt->add(term => '>:child-of',
+ pti => 35,
+ payload => '<i>0<i>0<i>0<i>0' . # character os
+ '<i>4<i>2<i>7<s>1<s>3<s>4'
+ ), 'New rel');
+
+done_testing;
+__END__
+
# 2-4 t0 4
ok($mtt->add(term => '<:child-of', p_end => 4, payload => '<b>0<i>4<s>4<s>3<s>1'), 'New rel');