Remove certainty value on lemmata in Treetagger Change-Id: Idaab5ac03bff819b2203fb0799dd6586cdacbc42

commit: 0426176c7359a75e76f29eedac19650dcbbd0922 [log] [tgz]
author: Akron <nils@diewald-online.de> Mon Jan 29 20:16:39 2018 +0100
committer: Akron <nils@diewald-online.de> Mon Jan 29 20:16:39 2018 +0100
tree: 347da39723fb071f577dfc6de81fe69e50a4bf84
parent: 6727b21102e38f811475d8c1f6079ad2979473dd [diff]
diff --git a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
index a337261..15a3947 100644
--- a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm

@@ -19,48 +19,51 @@
       $content = ref $content ne 'ARRAY' ? [$content] : $content;
 
       foreach my $fs (@$content) {
-	$content = $fs->{fs}->{f};
+        $content = $fs->{fs}->{f};
 
-	my @val;
-	my $certainty = 0;
-	foreach (@$content) {
-	  if ($_->{-name} eq 'certainty') {
-	    $certainty = floor(($_->{'#text'} * 255));
-	    $certainty = $certainty if $certainty;
-	  }
-	  else {
-	    push @val, $_
-	  };
-	};
+        my @val;
+        my $certainty = 0;
+        foreach (@$content) {
+          if ($_->{-name} eq 'certainty') {
+            $certainty = floor(($_->{'#text'} * 255));
+            $certainty = $certainty if $certainty;
+          }
+          else {
+            push @val, $_
+          };
+        };
 
-	foreach (@val) {
-	  # lemma
-	  if (($_->{-name} eq 'lemma') &&
-		($found = $_->{'#text'}) &&
-		  ($found ne 'UNKNOWN') &&
-		    ($found ne '?')) {
-	    my %term = (
-	      term => 'tt/l:' . $found
-	    );
-	    if ($certainty) {
-	      $term{pti} = 129;
-	      $term{payload} = '<b>' . $certainty;
-	    };
-	    $mtt->add(%term);
-	  };
+        # Iterate over values
+        foreach (@val) {
+          # lemma
+          if (($_->{-name} eq 'lemma') &&
+                ($found = $_->{'#text'}) &&
+                ($found ne 'UNKNOWN') &&
+                ($found ne '?')) {
+            my %term = (
+              term => 'tt/l:' . $found
+            );
 
-	  # pos
-	  if (($_->{-name} eq 'ctag') && ($found = $_->{'#text'})) {
-	    my %term = (
-	      term => 'tt/p:' . $found
-	    );
-	    if ($certainty) {
-	      $term{pti} = 129;
-	      $term{payload} = '<b>' . $certainty;
-	    };
-	    $mtt->add(%term);
-	  };
-	};
+            # Ignore certainty for lemma
+            # if ($certainty) {
+            #   $term{pti} = 129;
+            #   $term{payload} = '<b>' . $certainty;
+            # };
+            $mtt->add(%term);
+          };
+
+          # pos
+          if (($_->{-name} eq 'ctag') && ($found = $_->{'#text'})) {
+            my %term = (
+              term => 'tt/p:' . $found
+            );
+            if ($certainty) {
+              $term{pti} = 129;
+              $term{payload} = '<b>' . $certainty;
+            };
+            $mtt->add(%term);
+          };
+        };
       };
     }) or return;
 

diff --git a/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml b/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml
index e229cec..7026e47 100644
--- a/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml
+++ b/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml

@@ -144,6 +144,15 @@
             <f name="ctag">KOUS</f>
           </fs>
         </f>
+
+        <!-- This is just to test different certainties  -->
+        <f name="lex">
+          <fs>
+            <f name="lemma">bevor</f>
+            <f name="certainty">0.225048</f>
+            <f name="ctag">PTKVZ</f>
+          </fs>
+        </f>
       </fs>
     </span>
     <span from="85" id="s_20" to="88">

diff --git a/t/annotation/tt_morpho.t b/t/annotation/tt_morpho.t
index 3ce5cc3..726edce 100644
--- a/t/annotation/tt_morpho.t
+++ b/t/annotation/tt_morpho.t

@@ -18,18 +18,22 @@
 like($data->{layerInfos}, qr!tt/p=tokens!, 'data');
 like($data->{layerInfos}, qr!tt/l=tokens!, 'data');
 
-is($data->{stream}->[0]->[5], 'tt/l:zum$<b>129<b>255', 'POS');
+is($data->{stream}->[0]->[5], 'tt/l:zum', 'POS');
 is($data->{stream}->[0]->[6], 'tt/p:APPRART$<b>129<b>255', 'POS');
 
-is($data->{stream}->[3]->[3], 'tt/l:Anlaß$<b>129<b>255', 'POS');
+is($data->{stream}->[3]->[3], 'tt/l:Anlaß', 'POS');
 is($data->{stream}->[3]->[4], 'tt/p:NN$<b>129<b>255', 'POS');
 
-is($data->{stream}->[10]->[3], 'tt/l:ein$<b>129<b>253', 'POS');
+is($data->{stream}->[10]->[3], 'tt/l:ein', 'POS');
 is($data->{stream}->[10]->[4], 'tt/p:PTKVZ$<b>129<b>253', 'POS');
 
-is($data->{stream}->[-1]->[3], 'tt/l:werden$<b>129<b>255', 'POS');
+is($data->{stream}->[-1]->[3], 'tt/l:werden', 'POS');
 is($data->{stream}->[-1]->[4], 'tt/p:VAFIN$<b>129<b>255', 'POS');
 
+is($data->{stream}->[11]->[3], 'tt/l:bevor', 'Lemma');
+is($data->{stream}->[11]->[4], 'tt/p:KOUS$<b>129<b>254', 'Lemma');
+
+
 done_testing;
 
 __END__

diff --git a/t/real/goethe.t b/t/real/goethe.t
index fc13cca..d63849b 100644
--- a/t/real/goethe.t
+++ b/t/real/goethe.t

@@ -178,9 +178,9 @@
 
 is($output->{data}->{layerInfos}, 'base/s=spans opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans', 'layerInfos');
 $first_token = join('||', @{$output->{data}->{stream}->[0]});
-like($first_token, qr!tt/l:autobiographisch\$<b>129<b>165!, 'data');
+like($first_token, qr!tt/l:autobiographisch!, 'data');
 like($first_token, qr!tt/p:ADJA\$<b>129<b>165!, 'data');
-like($first_token, qr!tt/l:Autobiographische\$<b>129<b>89!, 'data');
+like($first_token, qr!tt/l:Autobiographische!, 'data');
 like($first_token, qr!tt/p:NN\$<b>129<b>89!, 'data');
 
 ## CoreNLP

diff --git a/t/real/rei.t b/t/real/rei.t
index 5e99df4..2a3511e 100644
--- a/t/real/rei.t
+++ b/t/real/rei.t

@@ -220,7 +220,7 @@
 is('i:rede', $first->[26]);
 is('opennlp/p:NN', $first->[27]);
 is('s:Rede', $first->[28]);
-is('tt/l:Rede$<b>129<b>253', $first->[29]);
+is('tt/l:Rede', $first->[29]);
 is('tt/p:NN$<b>129<b>253', $first->[30]);
 
 my $last = $output->{data}->{stream}->[-1];
@@ -233,7 +233,7 @@
 is("i:schr\x{f6}der", $last->[5]);
 is('opennlp/p:NE', $last->[6]);
 is("s:Schr\x{f6}der", $last->[7]);
-is("tt/l:Schr\x{f6}der\$<b>129<b>255", $last->[8]);
+is("tt/l:Schr\x{f6}der", $last->[8]);
 is('tt/p:NE$<b>129<b>255', $last->[9]);
 
 
@@ -308,7 +308,7 @@
 is('i:christine', $first->[28]);
 is('opennlp/p:NE', $first->[29]);
 is('s:Christine', $first->[30]);
-is('tt/l:Christine$<b>129<b>255', $first->[31]);
+is('tt/l:Christine', $first->[31]);
 is('tt/p:NE$<b>129<b>255', $first->[32]);
 
 $last = $output->{data}->{stream}->[-1];

diff --git a/t/transform.t b/t/transform.t
index 798e6c2..57d34c0 100644
--- a/t/transform.t
+++ b/t/transform.t

@@ -161,9 +161,9 @@
   '<>:cnx/c:np$<b>64<i>0<i>1<i>1<b>0|'.
   '<>:cnx/s:s$<b>64<i>0<i>74<i>13<b>0|'.
   '-:cnx/sentences$<i>63|'.
-  'tt/l:A$<b>129<b>199|'.
+  'tt/l:A|'.
   'tt/p:NN$<b>129<b>199|'.
-  'tt/l:A$<b>129<b>54|'.
+  'tt/l:A|'.
   'tt/p:FM$<b>129<b>54|'.
   '<>:tt/s:s$<b>64<i>0<i>6083<i>923<b>0|'.
   '-:tt/sentences$<i>1|'.
commit	0426176c7359a75e76f29eedac19650dcbbd0922	[log] [tgz]
author	Akron <nils@diewald-online.de>	Mon Jan 29 20:16:39 2018 +0100
committer	Akron <nils@diewald-online.de>	Mon Jan 29 20:16:39 2018 +0100
tree	347da39723fb071f577dfc6de81fe69e50a4bf84
parent	6727b21102e38f811475d8c1f6079ad2979473dd [diff]