Fix certainty values in TreeTagger output

Change-Id: Iec3833e9c13d945caf871f5f145f6ee2860ac9a0
diff --git a/Changes b/Changes
index 830c673..094ad01 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,6 @@
-0.33 2018-01-16
+0.33 2018-02-01
         - Added LWC support.
+        - Fixed TreeTagger certainties.
 
 0.32 2017-10-24
         - Fixed tar building process in script.
diff --git a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
index 15a3947..54d2436 100644
--- a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
@@ -18,6 +18,9 @@
 
       $content = ref $content ne 'ARRAY' ? [$content] : $content;
 
+      my (%lemma, %pos) = ();
+
+      # Iterate over feature structures
       foreach my $fs (@$content) {
         $content = $fs->{fs}->{f};
 
@@ -25,8 +28,7 @@
         my $certainty = 0;
         foreach (@$content) {
           if ($_->{-name} eq 'certainty') {
-            $certainty = floor(($_->{'#text'} * 255));
-            $certainty = $certainty if $certainty;
+            $certainty = $_->{'#text'};
           }
           else {
             push @val, $_
@@ -40,31 +42,42 @@
                 ($found = $_->{'#text'}) &&
                 ($found ne 'UNKNOWN') &&
                 ($found ne '?')) {
-            my %term = (
-              term => 'tt/l:' . $found
-            );
-
-            # Ignore certainty for lemma
-            # if ($certainty) {
-            #   $term{pti} = 129;
-            #   $term{payload} = '<b>' . $certainty;
-            # };
-            $mtt->add(%term);
+            $lemma{$found} += $certainty // 1;
           };
 
           # pos
           if (($_->{-name} eq 'ctag') && ($found = $_->{'#text'})) {
-            my %term = (
-              term => 'tt/p:' . $found
-            );
-            if ($certainty) {
-              $term{pti} = 129;
-              $term{payload} = '<b>' . $certainty;
-            };
-            $mtt->add(%term);
+
+            $pos{$found} += $certainty // 1;
           };
         };
       };
+
+      my %term;
+      foreach (keys %lemma) {
+        if ($lemma{$_} < 1) {
+          $mtt->add(
+            term => 'tt/l:' . $_,
+            pti => 129,
+            payload => '<b>' . floor(($lemma{$_} * 255))
+          );
+        } else {
+          $mtt->add(term => 'tt/l:' . $_);
+        };
+      };
+
+      foreach (keys %pos) {
+        if ($pos{$_} < 1) {
+          $mtt->add(
+            term => 'tt/p:' . $_,
+            pti => 129,
+            payload => '<b>' . floor(($pos{$_} * 255))
+          );
+        } else {
+          $mtt->add(term => 'tt/p:' . $_);
+        };
+      };
+
     }) or return;
 
   return 1;
diff --git a/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml b/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml
index 7026e47..983ef05 100644
--- a/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml
+++ b/t/annotation/corpus/doc/0001/tree_tagger/morpho.xml
@@ -140,7 +140,7 @@
         <f name="lex">
           <fs>
             <f name="lemma">bevor</f>
-            <f name="certainty">0.999168</f>
+            <f name="certainty">0.8</f>
             <f name="ctag">KOUS</f>
           </fs>
         </f>
@@ -149,10 +149,19 @@
         <f name="lex">
           <fs>
             <f name="lemma">bevor</f>
-            <f name="certainty">0.225048</f>
+            <f name="certainty">0.1</f>
             <f name="ctag">PTKVZ</f>
           </fs>
         </f>
+
+        <f name="lex">
+          <fs>
+            <f name="lemma">bevora</f>
+            <f name="certainty">0.1</f>
+            <f name="ctag">PTKVZ</f>
+          </fs>
+        </f>
+
       </fs>
     </span>
     <span from="85" id="s_20" to="88">
diff --git a/t/annotation/tt_morpho.t b/t/annotation/tt_morpho.t
index 726edce..cf97fa6 100644
--- a/t/annotation/tt_morpho.t
+++ b/t/annotation/tt_morpho.t
@@ -19,19 +19,25 @@
 like($data->{layerInfos}, qr!tt/l=tokens!, 'data');
 
 is($data->{stream}->[0]->[5], 'tt/l:zum', 'POS');
-is($data->{stream}->[0]->[6], 'tt/p:APPRART$<b>129<b>255', 'POS');
+is($data->{stream}->[0]->[6], 'tt/p:APPRART', 'POS');
 
 is($data->{stream}->[3]->[3], 'tt/l:Anlaß', 'POS');
-is($data->{stream}->[3]->[4], 'tt/p:NN$<b>129<b>255', 'POS');
+is($data->{stream}->[3]->[4], 'tt/p:NN', 'POS');
 
-is($data->{stream}->[10]->[3], 'tt/l:ein', 'POS');
+is($data->{stream}->[10]->[3], 'tt/l:ein$<b>129<b>253', 'POS');
 is($data->{stream}->[10]->[4], 'tt/p:PTKVZ$<b>129<b>253', 'POS');
 
 is($data->{stream}->[-1]->[3], 'tt/l:werden', 'POS');
-is($data->{stream}->[-1]->[4], 'tt/p:VAFIN$<b>129<b>255', 'POS');
+is($data->{stream}->[-1]->[4], 'tt/p:VAFIN', 'POS');
 
-is($data->{stream}->[11]->[3], 'tt/l:bevor', 'Lemma');
-is($data->{stream}->[11]->[4], 'tt/p:KOUS$<b>129<b>254', 'Lemma');
+is($data->{stream}->[11]->[3], 'tt/l:bevor$<b>129<b>229',
+   'Lemma');
+is($data->{stream}->[11]->[4], 'tt/l:bevora$<b>129<b>25',
+   'Lemma');
+is($data->{stream}->[11]->[5], 'tt/p:KOUS$<b>129<b>204',
+   'Lemma');
+is($data->{stream}->[11]->[6], 'tt/p:PTKVZ$<b>129<b>51',
+   'Lemma');
 
 
 done_testing;
diff --git a/t/real/rei.t b/t/real/rei.t
index 2a3511e..0d4d62c 100644
--- a/t/real/rei.t
+++ b/t/real/rei.t
@@ -220,7 +220,7 @@
 is('i:rede', $first->[26]);
 is('opennlp/p:NN', $first->[27]);
 is('s:Rede', $first->[28]);
-is('tt/l:Rede', $first->[29]);
+is('tt/l:Rede$<b>129<b>253', $first->[29]);
 is('tt/p:NN$<b>129<b>253', $first->[30]);
 
 my $last = $output->{data}->{stream}->[-1];
@@ -234,7 +234,7 @@
 is('opennlp/p:NE', $last->[6]);
 is("s:Schr\x{f6}der", $last->[7]);
 is("tt/l:Schr\x{f6}der", $last->[8]);
-is('tt/p:NE$<b>129<b>255', $last->[9]);
+is('tt/p:NE', $last->[9]);
 
 
 # REI/BNG/00071
@@ -309,7 +309,7 @@
 is('opennlp/p:NE', $first->[29]);
 is('s:Christine', $first->[30]);
 is('tt/l:Christine', $first->[31]);
-is('tt/p:NE$<b>129<b>255', $first->[32]);
+is('tt/p:NE', $first->[32]);
 
 $last = $output->{data}->{stream}->[-1];
 is('<>:dereko/s:back$<b>65<i>7008<i>7008<i>1008<b>1', $last->[0]);
diff --git a/t/transform.t b/t/transform.t
index 57d34c0..acb314a 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -161,9 +161,9 @@
   '<>:cnx/c:np$<b>64<i>0<i>1<i>1<b>0|'.
   '<>:cnx/s:s$<b>64<i>0<i>74<i>13<b>0|'.
   '-:cnx/sentences$<i>63|'.
-  'tt/l:A|'.
+#  'tt/l:A|'.
   'tt/p:NN$<b>129<b>199|'.
-  'tt/l:A|'.
+  'tt/l:A$<b>129<b>253|'.
   'tt/p:FM$<b>129<b>54|'.
   '<>:tt/s:s$<b>64<i>0<i>6083<i>923<b>0|'.
   '-:tt/sentences$<i>1|'.