Fixed relying on p_end in relation tokens
Change-Id: I5d82f8fe14a358aa7decb0a91dddc7aaa1a42c2e
diff --git a/lib/KorAP/XML/Field/MultiTerm.pm b/lib/KorAP/XML/Field/MultiTerm.pm
index 7ba3f7e..43e0049 100644
--- a/lib/KorAP/XML/Field/MultiTerm.pm
+++ b/lib/KorAP/XML/Field/MultiTerm.pm
@@ -3,6 +3,9 @@
use warnings;
use MIME::Base64;
+# Todo: This should store only the pti and the payload - with clever access using the pti!
+# Everything should be stored as bytes already (if this is feasible)
+
sub new {
my $self = bless [], shift;
my $i = 0;
diff --git a/lib/KorAP/XML/Field/MultiTermToken.pm b/lib/KorAP/XML/Field/MultiTermToken.pm
index 7331870..c12d35a 100644
--- a/lib/KorAP/XML/Field/MultiTermToken.pm
+++ b/lib/KorAP/XML/Field/MultiTermToken.pm
@@ -145,8 +145,22 @@
elsif (
(index($a->[5],'<:') == 0 || index($a->[5],'>:') == 0) &&
(index($b->[5], '<:') == 0 || index($b->[5],'>:') == 0)) {
- my $a_end = $a->[2] // 0;
- my $b_end = $b->[2] // 0;
+
+ my $a_end = ($a->pti < 34 ? $a->p_start : (
+ ($a->pti == 35 ? ($a->[0] =~ /^(?:<i>\d+){4}<i>(\d+)</) :
+ ($a->[0] =~ /^(?:<i>\d+){2}<i>(\d+)</)
+ )
+ ));
+
+ my $b_end = ($b->pti < 34 ? $b->p_start : (
+ ($b->pti == 35 ? ($b->[0] =~ /^(?:<i>\d+){4}<i>(\d+)</) :
+ ($b->[0] =~ /^(?:<i>\d+){2}<i>(\d+)</)
+ )
+ ));
+
+
+# my $a_end = $a->[2] // 0;
+# my $b_end = $b->[2] // 0;
# left is p_end
if ($a_end < $b_end) {
diff --git a/t/sort_tokens.t b/t/sort_tokens.t
index 24990ac..a6eed45 100644
--- a/t/sort_tokens.t
+++ b/t/sort_tokens.t
@@ -104,7 +104,7 @@
'<i>4<i>2<i>7<s>1<s>3<s>4'
), 'New rel');
-# 2-4 t0 4
+# 2-4 to 4
ok($mtt->add(term => '<:child-of',
pti => 34,
payload => '<i>0<i>0' . # character os
@@ -130,8 +130,39 @@
is($mtt->to_string,
'[(0-5)'.
+ # 2 -> 2-4
+ '>:child-of$<b>33<i>0<i>0' . '<i>2<i>4<s>2<s>1<s>3|'.
+ # 2 -> 3
+ '>:child-of$<b>32' . '<i>3<s>2<s>4<s>2|'.
+ # 2-4 -> 2-7
+ '>:child-of$<b>35<i>0<i>0<i>0<i>0' . '<i>4<i>2<i>7<s>1<s>3<s>4|'.
+ # 2-4 -> 3
+ '<:child-of$<b>34<i>4<i>0<i>0' . '<i>4<i>3<s>3<s>3<s>1|' .
+ # 2-4 -> 4
+ '<:child-of$<b>34<i>0<i>0' . '<i>4<i>4<s>4<s>3<s>1|'.
+# 2-7 -> 1-7
+ '>:child-of$<b>35<i>0<i>0<i>0<i>0' . '<i>7<i>1<i>7<s>2<s>4<s>2|'.
+# 2-7 -> 2-4
+ '<:child-of$<b>35<i>0<i>0<i>0<i>0' . '<i>7<i>2<i>4<s>5<s>4<s>3|'.
+# 2-7 -> 4-7
+ '<:child-of$<b>35<i>0<i>0<i>0<i>0' . '<i>7<i>4<i>7<s>6<s>4<s>2]' ,
+ 'Check sorted relations'
+ );
+
+done_testing;
+__END__
+
+NOTE: Sorting of the candidate spans can alternatively be done in
+ * indexing, instead of here. (first by left positions and then by
+ * right positions)
+
+
+
+is($mtt->to_string,
+ '[(0-5)'.
'>:child-of$<i>2<i>4<s>2<s>1<s>3|'.
'>:child-of$<i>3<s>2<s>4<s>2|'.
+
'>:child-of$<i>4<i>2<i>7<s>1<s>3<s>4|'.
'<:child-of$<i>4<b>0<i>3<s>3<s>3<s>1|'.
'<:child-of$<i>4<b>0<i>4<s>4<s>3<s>1|'.
@@ -141,24 +172,6 @@
'Check sorted relations'
);
-done_testing;
-__END__
-
-is($mtt->to_string,
- '[(0-5)'.
- # 2-7 -> 1-7
- '>:child-of$<b>35<i>0<i>0<i>0<i>0<i>7<i>1<i>7<s>2<s>4<s>2|'.
- '>:child-of$<b>33<i>0<i>0<i>2<i>4<s>2<s>1<s>3|'.
- '<:child-of$<b>35<i>0<i>0<i>0<i>0<i>7<i>2<i>4<s>5<s>4<s>3|'.
- '>:child-of$<b>35<i>0<i>0<i>0<i>0<i>4<i>2<i>7<s>1<s>3<s>4|'.
- '>:child-of$<b>32<i>3<s>2<s>4<s>2|'.
- '<:child-of$<b>34<i>0<i>0<i>4<i>4<s>4<s>3<s>1|'.
- '<:child-of$<b>35<i>0<i>0<i>0<i>0<i>7<i>4<i>7<s>6<s>4<s>2|'.
- '<:child-of$<b>34<i>4<i>0<i>0<i>4<i>3<s>3<s>3<s>1]',
- 'Check sorted relations'
- );
-
-
# 2 -> 2-4
# >:child-of$<i>2<i>4<s>2<s>1<s>3
# 2 -> 3