Binary relations with element support
Change-Id: I2db566bf8f491119f2d431cef504c7a9a29f85ed
diff --git a/lib/KorAP/XML/Field/MultiTermTokenStream.pm b/lib/KorAP/XML/Field/MultiTermTokenStream.pm
index 0d8ae05..3b828d5 100644
--- a/lib/KorAP/XML/Field/MultiTermTokenStream.pm
+++ b/lib/KorAP/XML/Field/MultiTermTokenStream.pm
@@ -14,22 +14,48 @@
};
sub get_node {
- my ($self, $pos, $term) = @_;
- my $mtt = $self->pos($pos);
- my $node = $mtt->grep_mt($term);
+ my ($self, $unit, $term) = @_;
- # TODO: Check if term has PTI 128 - or what is wanted!
+ if ($unit->type eq 'token') {
+ my $mtt = $self->pos($unit->pos);
+ my $node = $mtt->grep_mt($term);
- # TODO: if the node has no TUI - add!
- return $node if $node;
+ # TODO: Check if term has PTI 128 - or what is wanted!
- my $tui = $self->tui($pos);
- return $mtt->add(
- term => $term,
- pti => 128,
- payload => '<s>' . $tui,
- tui => $tui
- );
+ # TODO: if the node has no TUI - add!
+ return $node if $node;
+
+ my $tui = $self->tui($unit->pos);
+ return $mtt->add(
+ term => $term,
+ pti => 128,
+ payload => '<s>' . $tui,
+ tui => $tui
+ );
+ }
+
+ # Is span
+ else {
+ my $mtt = $self->pos($unit->p_start);
+ my $node = $mtt->grep_mt('<>:' . $term);
+
+ # TODO: if the node has no TUI - add!
+ return $node if $node;
+
+ my $tui = $self->tui($unit->p_start);
+
+ return $mtt->add(
+ term => '<>:' . $term,
+ o_start => $unit->o_start,
+ o_end => $unit->o_end,
+ p_start => $unit->p_start,
+ p_end => $unit->p_end,
+ pti => 64,
+ payload => '<b>0<s>' . $tui,
+ tui => $tui
+ );
+
+ };
};
sub add_meta {
diff --git a/lib/KorAP/XML/Index/Mate/Dependency.pm b/lib/KorAP/XML/Index/Mate/Dependency.pm
index 92885c5..ff28d7f 100644
--- a/lib/KorAP/XML/Index/Mate/Dependency.pm
+++ b/lib/KorAP/XML/Index/Mate/Dependency.pm
@@ -10,6 +10,7 @@
# >>:xip/d:SUBJ<i>566<i>789
# Relation data
+ # Supports term-to-term and term-to-element only
$$self->add_tokendata(
foundry => 'mate',
layer => 'dependency',
@@ -37,15 +38,14 @@
# I have no clue, what -- should mean
# next if $_->{-label} eq '--';
- # Target is at the same position!
- my $pos = $source->pos;
-
-
# Get target node - not very elegant
my $target = $stream->get_node(
- $pos, 'mate/d:' . $NODE_LABEL
+ $source, 'mate/d:' . $NODE_LABEL
);
+ # Target is at the same position!
+ my $pos = $source->pos;
+
my %rel = (
pti => 32, # term-to-term relation
payload =>
@@ -72,24 +72,23 @@
my $from = $_->{span}->{-from};
my $to = $_->{span}->{-to};
+ # Get source node
+ my $source_term = $stream->get_node(
+ $source, 'mate/d:' . $NODE_LABEL
+ );
+
# Target
my $target = $tokens->token($from, $to);
+ # Relation is term-to-term with a found target!
if ($target) {
- # Relation is term-to-term with a found target!
-
- # Get source node
- my $source_term = $stream->get_node(
- $source->pos, 'mate/d:' . $NODE_LABEL
- );
# Get target node
my $target_term = $stream->get_node(
- $target->pos, 'mate/d:' . $NODE_LABEL
+ $target, 'mate/d:' . $NODE_LABEL
);
- $mtt->add(
- term => '>:mate/d:' . $label,
+ my %rel = (
pti => 32, # term-to-term relation
payload =>
'<i>' . $target->pos . # right part token position
@@ -97,36 +96,44 @@
'<s>' . $target_term->tui # right part tui
);
+ $mtt->add(
+ term => '>:mate/d:' . $label,
+ %rel
+ );
+
my $target_mtt = $stream->pos($target->pos);
$target_mtt->add(
term => '<:mate/d:' . $label,
- pti => 32, # term-to-term relation
- payload =>
- '<i>' . $target->pos . # right part token position (TODO: THIS IS PROBABLY WRONG!)
- '<s>' . $source_term->tui . # left part tui
- '<s>' . $target_term->tui # right part tui
-
+ %rel
);
}
- else {
- # TODO: SPANS not yet supported
- next;
+ # Relation is possibly term-to-element with a found target!
+ elsif ($target = $tokens->span($from, $to)) {
+
+ # Get target node
+ my $target_span = $stream->get_node(
+ $target, 'mate/d:' . $NODE_LABEL
+ );
+
+ my %rel = (
+ pti => 33, # term-to-element relation
+ payload =>
+ '<i>' . $target->p_start . # right part token position
+ '<s>' . $source_term->tui . # left part tui
+ '<s>' . $target_span->tui # right part tui
+ );
+
+ $mtt->add(
+ term => '>:mate/d:' . $label,
+ %rel
+ );
+
+ $mtt->add(
+ term => '<:mate/d:' . $label,
+ %rel
+ );
};
-
-
- # Temporary
- next;
-
- $mtt->add(
- term => '>:mate/d:' . $label,
- payload => '<i>' . $target->pos
- );
-
- $stream->pos($target->pos)->add(
- term => '<:mate/d:' . $label,
- payload => '<i>' . $source->pos
- );
};
};
}) or return;
diff --git a/lib/KorAP/XML/Tokenizer/Span.pm b/lib/KorAP/XML/Tokenizer/Span.pm
index 9e34aec..b359e14 100644
--- a/lib/KorAP/XML/Tokenizer/Span.pm
+++ b/lib/KorAP/XML/Tokenizer/Span.pm
@@ -7,6 +7,10 @@
bless [], shift;
};
+sub type {
+ 'span';
+};
+
sub o_start {
if (defined $_[1]) {
$_[0]->[0] = $_[1];
diff --git a/lib/KorAP/XML/Tokenizer/Token.pm b/lib/KorAP/XML/Tokenizer/Token.pm
index 36334db..815f9e8 100644
--- a/lib/KorAP/XML/Tokenizer/Token.pm
+++ b/lib/KorAP/XML/Tokenizer/Token.pm
@@ -7,6 +7,10 @@
bless [], shift;
};
+sub type {
+ 'token';
+};
+
# get or set token position
sub pos {
if (defined $_[1]) {
diff --git a/t/index/mate_dependency.t b/t/index/mate_dependency.t
index 9ec7f92..fab2bb5 100644
--- a/t/index/mate_dependency.t
+++ b/t/index/mate_dependency.t
@@ -15,11 +15,14 @@
my $data = $tokens->to_data->{data}->{stream};
# Unary relation
-is($data->[4]->[3], '<:mate/d:--$<b>32<i>4<s>1<s>1', '< rel 1');
-is($data->[4]->[4], '>:mate/d:--$<b>32<i>4<s>1<s>1', '> rel 1');
-is($data->[4]->[7], 'mate/d:NODE$<b>128<s>1', 'token for rel 1');
+is($data->[4]->[3], '<:mate/d:--$<b>32<i>4<s>1<s>1', '< rel 1 (unary)');
+is($data->[4]->[4], '>:mate/d:--$<b>32<i>4<s>1<s>1', '> rel 1 (unary)');
+is($data->[4]->[7], 'mate/d:NODE$<b>128<s>1', 'token for rel 1 (unary)');
-diag 'Test binary relations';
+is($data->[1]->[0], '>:mate/d:NK$<b>32<i>3<s>1<s>1', '> rel 2 (term-to-term)');
+is($data->[1]->[3], 'mate/d:NODE$<b>128<s>1', '< rel 2 (term-to-term)');
+is($data->[3]->[1], '<:mate/d:NK$<b>32<i>3<s>1<s>1', '< rel 2 (term-to-term)');
+is($data->[3]->[4], 'mate/d:NODE$<b>128<s>1', '< rel 2 (term-to-term)');
done_testing;
diff --git a/t/transform.t b/t/transform.t
index 793e9bf..fd751f7 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -49,8 +49,7 @@
# Mate
# push(@layers, ['Mate', 'Morpho']);
-# push(@layers, ['Mate', 'Dependency']);
-diag 'Check for mate/d';
+push(@layers, ['Mate', 'Dependency']);
# XIP
push(@layers, ['XIP', 'Morpho']);
@@ -156,7 +155,8 @@
'tt/p:FM$<b>129<b>54|'.
'<>:tt/s:s$<b>64<i>0<i>6083<i>923<b>0|'.
'-:tt/sentences$<i>1|'.
-# '>:mate/d:PNC$<i>2|'.
+ '>:mate/d:PNC$<b>32<i>2<s>1<s>1|'.
+ 'mate/d:NODE$<b>128<s>1|'.
'xip/p:SYMBOL|'.
'xip/l:A|'.
'<>:xip/c:TOP$<b>64<i>0<i>74<i>13<b>0|'.
@@ -178,10 +178,11 @@
'Startinfo');
};
-is($tokens->layer_info,
- 'base/s=spans cnx/c=spans cnx/l=tokens cnx/m=tokens cnx/p=tokens cnx/s=spans cnx/syn=tokens corenlp/ne=tokens corenlp/s=spans mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans xip/c=spans xip/d=rels xip/l=tokens xip/p=tokens xip/s=spans', 'Layer info'); # mate/d=rels
-is($tokens->support, 'base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/namedentities corenlp/namedentities corenlp/namedentities/ne_dewac_175m_600 corenlp/namedentities/ne_hgc_175m_600 corenlp/sentences mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/dependency xip/morpho xip/sentences', 'Support'); # mate/dependency
+is($tokens->layer_info,
+ 'base/s=spans cnx/c=spans cnx/l=tokens cnx/m=tokens cnx/p=tokens cnx/s=spans cnx/syn=tokens corenlp/ne=tokens corenlp/s=spans mate/d=rels mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans xip/c=spans xip/d=rels xip/l=tokens xip/p=tokens xip/s=spans', 'Layer info');
+
+is($tokens->support, 'base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/namedentities corenlp/namedentities corenlp/namedentities/ne_dewac_175m_600 corenlp/namedentities/ne_hgc_175m_600 corenlp/sentences mate mate/dependency mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/dependency xip/morpho xip/sentences', 'Support');
done_testing;
__END__