TUIs are now optional if not set
Change-Id: I01b552bb37012c8a0b69a527667173c22ff5e1fb
diff --git a/lib/KorAP/XML/Field/MultiTermToken.pm b/lib/KorAP/XML/Field/MultiTermToken.pm
index 98fcd8a..7331870 100644
--- a/lib/KorAP/XML/Field/MultiTermToken.pm
+++ b/lib/KorAP/XML/Field/MultiTermToken.pm
@@ -98,22 +98,22 @@
# term to span - right token
# (including character offsets)
- elsif ($_[0] == 33 && $_[1] =~ m/^(?:<i>\d+){2}<i>(\d+)<i>(\d+)<s>/o) {
+ elsif ($_[0] == 33 && $_[1] =~ m/^(?:<i>\d+){2}<i>(\d+)<i>(\d+)(?:<s>|$)/o) {
return ($1, $2);
}
# span to term
- elsif ($_[0] == 34 && $_[1] =~ m/^(?:<i>\d+){3}<i>(\d+)<s>/o) {
+ elsif ($_[0] == 34 && $_[1] =~ m/^(?:<i>\d+){3}<i>(\d+)(?:<s>|$)/o) {
return ($1, $1);
}
# span-to-span
- elsif ($_[0] == 35 && $_[1] =~ m/^(?:<i>\d+){4}<i>(\d+)<i>(\d+)<s>/o) {
+ elsif ($_[0] == 35 && $_[1] =~ m/^(?:<i>\d+){5}<i>(\d+)<i>(\d+)(?:<s>|$)/o) {
return ($1, $2);
};
# span to term - right token
- carp 'Unknown relation format! ' . $_[1];
+ carp 'Unknown relation format! ' .$_[0] . ':' . $_[1];
return (0,0);
};
@@ -158,8 +158,6 @@
else {
# Both are either > or <
- warn $a->to_string;
-
# Check for right positions
(my $a_start, $a_end) = _rel_right_pos($a->pti, $a->[0]);
(my $b_start, $b_end) = _rel_right_pos($b->pti, $b->[0]);
diff --git a/lib/KorAP/XML/Index/Mate/Dependency.pm b/lib/KorAP/XML/Index/Mate/Dependency.pm
index 86b3463..04e1e9f 100644
--- a/lib/KorAP/XML/Index/Mate/Dependency.pm
+++ b/lib/KorAP/XML/Index/Mate/Dependency.pm
@@ -51,9 +51,9 @@
my %rel = (
pti => 32, # term-to-term relation
payload =>
- '<i>' . $pos . # right part token position
- '<s>0' . # $target->tui . # left part tui
- '<s>0' # . $target->tui # right part tui
+ '<i>' . $pos # . # right part token position
+# '<s>0' . # $target->tui . # left part tui
+# '<s>0' # . $target->tui # right part tui
);
# Add relations
@@ -95,9 +95,9 @@
term => '>:mate/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $target->pos . # right part token position
- '<s>0' . # $source_term->tui . # left part tui
- '<s>0' # . $target_term->tui # right part tui
+ '<i>' . $target->pos # . # right part token position
+# '<s>0' . # $source_term->tui . # left part tui
+# '<s>0' # . $target_term->tui # right part tui
);
my $target_mtt = $stream->pos($target->pos);
@@ -105,9 +105,9 @@
term => '<:mate/d:' . $label,
pti => 32, # term-to-term relation
payload =>
- '<i>' . $source->pos . # left part token position
- '<s>0' . # $source_term->tui . # left part tui
- '<s>0' # . $target_term->tui # right part tui
+ '<i>' . $source->pos # . # left part token position
+# '<s>0' . # $source_term->tui . # left part tui
+# '<s>0' # . $target_term->tui # right part tui
);
}
@@ -126,9 +126,9 @@
'<i>' . $target->o_start . # end position
'<i>' . $target->o_end . # end position
'<i>' . $target->p_start . # right part start position
- '<i>' . $target->p_end . # right part end position
- '<s>0' . # $source_term->tui . # left part tui
- '<s>0' # . $target_span->tui # right part tui
+ '<i>' . $target->p_end # . # right part end position
+# '<s>0' . # $source_term->tui . # left part tui
+# '<s>0' # . $target_span->tui # right part tui
);
my $target_mtt = $stream->pos($target->p_start);
@@ -139,9 +139,9 @@
'<i>' . $target->o_start . # end position
'<i>' . $target->o_end . # end position
'<i>' . $target->p_end . # right part end position
- '<i>' . $source->pos . # left part token position
- '<s>0' . # $source_term->tui . # left part tui
- '<s>0' # . $target_span->tui # right part tui
+ '<i>' . $source->pos # . # left part token position
+# '<s>0' . # $source_term->tui . # left part tui
+# '<s>0' # . $target_span->tui # right part tui
);
}
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 16d80d5..f534185 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -14,6 +14,7 @@
use KorAP::XML::Archive;
use KorAP::XML::Tokenizer;
use Parallel::ForkManager;
+# TODO: use Parallel::Loops
# CHANGES:
# ----------------------------------------------------------
diff --git a/t/index/mate_dependency.t b/t/index/mate_dependency.t
index e322bd2..2567a2a 100644
--- a/t/index/mate_dependency.t
+++ b/t/index/mate_dependency.t
@@ -14,14 +14,14 @@
my $data = $tokens->to_data->{data}->{stream};
-is($data->[4]->[1], '<:mate/d:--$<b>32<i>4<s>0<s>0', '< rel 1 (unary)');
-is($data->[4]->[2], '>:mate/d:--$<b>32<i>4<s>0<s>0', '> rel 1 (unary)');
+is($data->[4]->[1], '<:mate/d:--$<b>32<i>4', '< rel 1 (unary)');
+is($data->[4]->[2], '>:mate/d:--$<b>32<i>4', '> rel 1 (unary)');
#is($data->[4]->[8], 'mate/d:&&&$<b>128<s>1', 'token for rel 1 (unary)');
-is($data->[1]->[0], '>:mate/d:NK$<b>32<i>3<s>0<s>0', '> rel 2 (term-to-term)');
+is($data->[1]->[0], '>:mate/d:NK$<b>32<i>3', '> rel 2 (term-to-term)');
#is($data->[1]->[3], 'mate/d:&&&$<b>128<s>1', '< rel 2 (term-to-term)');
-is($data->[3]->[1], '<:mate/d:NK$<b>32<i>1<s>0<s>0', '< rel 2 (term-to-term)');
+is($data->[3]->[1], '<:mate/d:NK$<b>32<i>1', '< rel 2 (term-to-term)');
#is($data->[3]->[5], 'mate/d:&&&$<b>128<s>1', '< rel 2 (term-to-term)');
diff --git a/t/real/wpd.t b/t/real/wpd.t
index efbc89b..45ffd01 100644
--- a/t/real/wpd.t
+++ b/t/real/wpd.t
@@ -72,9 +72,12 @@
my $stream = $tokens->to_data->{data}->{stream};
-is($stream->[77]->[0], '<:mate/d:--$<b>34<i>78<s>0<s>0', 'element to term');
+# This is not a goot relation example
+is($stream->[77]->[0],
+ '<:mate/d:--$<b>34<i>498<i>499<i>78<i>78',
+ 'element to term');
#is($stream->[77]->[1], '<>:mate/d:&&&$<b>64<i>498<i>499<i>78<b>0<s>1', 'element to term');
-is($stream->[78]->[0], '>:mate/d:--$<b>33<i>498<i>499<i>77<i>78<s>0<s>0', 'term to element');
+is($stream->[78]->[0], '>:mate/d:--$<b>33<i>498<i>499<i>77<i>78', 'term to element');
# is($stream->[78]->[3], 'mate/d:&&&$<b>128<s>1', 'Node');
diff --git a/t/sort_tokens.t b/t/sort_tokens.t
index d777cfd..24990ac 100644
--- a/t/sort_tokens.t
+++ b/t/sort_tokens.t
@@ -78,7 +78,7 @@
ok($mtt->add(term => '<:child-of',
pti => 35,
payload => '<i>0<i>0<i>0<i>0'. # character os
- '<i>7<i>2<i>4<s>0<s>0'
+ '<i>7<i>2<i>4<s>5<s>4<s>3'
), 'New rel');
# 2-4 to 3
@@ -86,7 +86,7 @@
p_end => 4,
pti => 34,
payload => '<i>0<i>0' . # character os
- '<i>4<i>3<s>0<s>0<s>0'
+ '<i>4<i>3<s>3<s>3<s>1'
), 'New rel');
# 2 to 2-4
@@ -94,7 +94,7 @@
ok($mtt->add(term => '>:child-of',
pti => 33,
payload => '<i>0<i>0'. # character os
- '<i>2<i>4<s>0<s>0<s>0'
+ '<i>2<i>4<s>2<s>1<s>3'
), 'New rel');
# 2-4 to 2-7
@@ -104,23 +104,61 @@
'<i>4<i>2<i>7<s>1<s>3<s>4'
), 'New rel');
+# 2-4 t0 4
+ok($mtt->add(term => '<:child-of',
+ pti => 34,
+ payload => '<i>0<i>0' . # character os
+ '<i>4<i>4<s>4<s>3<s>1'), 'New rel');
+
+# 2-7 to 1-7
+ok($mtt->add(term => '>:child-of',
+ pti => 35,
+ payload => '<i>0<i>0<i>0<i>0' . # character os
+ '<i>7<i>1<i>7<s>2<s>4<s>2'), 'New rel');
+
+# 2-7 to 4-7
+ok($mtt->add(term => '<:child-of',
+ pti => 35,
+ payload => '<i>0<i>0<i>0<i>0' . # character os
+ '<i>7<i>4<i>7<s>6<s>4<s>2'), 'New rel');
+
+# 2 to 3
+ok($mtt->add(term => '>:child-of',
+ pti => 32,
+ payload => '<i>3<s>2<s>4<s>2'
+ ), 'New rel');
+
+is($mtt->to_string,
+ '[(0-5)'.
+ '>:child-of$<i>2<i>4<s>2<s>1<s>3|'.
+ '>:child-of$<i>3<s>2<s>4<s>2|'.
+ '>:child-of$<i>4<i>2<i>7<s>1<s>3<s>4|'.
+ '<:child-of$<i>4<b>0<i>3<s>3<s>3<s>1|'.
+ '<:child-of$<i>4<b>0<i>4<s>4<s>3<s>1|'.
+ '>:child-of$<i>7<i>1<i>7<s>2<s>4<s>2|'.
+ '<:child-of$<i>7<i>2<i>4<s>5<s>4<s>3|'.
+ '<:child-of$<i>7<i>4<i>7<s>6<s>4<s>2]',
+ 'Check sorted relations'
+ );
+
done_testing;
__END__
+is($mtt->to_string,
+ '[(0-5)'.
+ # 2-7 -> 1-7
+ '>:child-of$<b>35<i>0<i>0<i>0<i>0<i>7<i>1<i>7<s>2<s>4<s>2|'.
+ '>:child-of$<b>33<i>0<i>0<i>2<i>4<s>2<s>1<s>3|'.
+ '<:child-of$<b>35<i>0<i>0<i>0<i>0<i>7<i>2<i>4<s>5<s>4<s>3|'.
+ '>:child-of$<b>35<i>0<i>0<i>0<i>0<i>4<i>2<i>7<s>1<s>3<s>4|'.
+ '>:child-of$<b>32<i>3<s>2<s>4<s>2|'.
+ '<:child-of$<b>34<i>0<i>0<i>4<i>4<s>4<s>3<s>1|'.
+ '<:child-of$<b>35<i>0<i>0<i>0<i>0<i>7<i>4<i>7<s>6<s>4<s>2|'.
+ '<:child-of$<b>34<i>4<i>0<i>0<i>4<i>3<s>3<s>3<s>1]',
+ 'Check sorted relations'
+ );
-# 2-4 t0 4
-ok($mtt->add(term => '<:child-of', p_end => 4, payload => '<b>0<i>4<s>4<s>3<s>1'), 'New rel');
-# 2-7 to 1-7
-ok($mtt->add(term => '>:child-of', p_end => 7, payload => '<i>1<i>7<s>2<s>4<s>2'), 'New rel');
-
-# 2-7 to 4-7
-ok($mtt->add(term => '<:child-of', p_end => 7, payload => '<i>4<i>7<s>6<s>4<s>2'), 'New rel');
-
-# 2 to 3
-ok($mtt->add(term => '>:child-of', payload => '<i>3<s>2<s>4<s>2'), 'New rel');
-
-is($mtt->to_string, '[(0-5)>:child-of$<i>2<i>4<s>2<s>1<s>3|>:child-of$<i>3<s>2<s>4<s>2|>:child-of$<i>4<i>2<i>7<s>1<s>3<s>4|<:child-of$<i>4<b>0<i>3<s>3<s>3<s>1|<:child-of$<i>4<b>0<i>4<s>4<s>3<s>1|>:child-of$<i>7<i>1<i>7<s>2<s>4<s>2|<:child-of$<i>7<i>2<i>4<s>5<s>4<s>3|<:child-of$<i>7<i>4<i>7<s>6<s>4<s>2]', 'Check sorted relations');
# 2 -> 2-4
# >:child-of$<i>2<i>4<s>2<s>1<s>3
# 2 -> 3
@@ -142,4 +180,3 @@
__END__
-
diff --git a/t/transform.t b/t/transform.t
index 6b83124..026b5d3 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -167,7 +167,7 @@
'tt/p:FM$<b>129<b>54|'.
'<>:tt/s:s$<b>64<i>0<i>6083<i>923<b>0|'.
'-:tt/sentences$<i>1|'.
- '>:mate/d:PNC$<b>32<i>2<s>0<s>0|'.
+ '>:mate/d:PNC$<b>32<i>2|' . #<s>0<s>0|'.
# 'mate/d:&&&$<b>128<s>1|'.
'xip/p:SYMBOL|'.
'xip/l:A|'.