Accept "pos" as an alias for ctag and default certainty to 1 for TreeTagger

Fixes CoNLL-U-Treetagger compatibility.

Change-Id: I6301b3d826da8330ee33d83a286f765b08af04b6
diff --git a/Changes b/Changes
index 338a5d5..aac2050 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,9 @@
 0.53 2023-03-20
         - Added Spacy support. (kupietz)
+        - Support 'pos' as an alternative to 'ctag'
+          in Treetagger. (kupietz)
+        - Change default certainty value in TreeTagger
+          to 1.
 
 0.52 2023-01-23
         - Introduced 'quiet' flag.
diff --git a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
index 81fc525..e66376c 100644
--- a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
@@ -26,7 +26,7 @@
         $content = $fs->{fs}->{f};
 
         my @val;
-        my $certainty = 0;
+        my $certainty = 1;
         foreach (@$content) {
           if ($_->{-name} eq 'certainty') {
 
@@ -54,7 +54,7 @@
           };
 
           # pos
-          if (($_->{-name} eq 'ctag') && ($found = $_->{'#text'})) {
+          if (($_->{-name} eq 'ctag' || $_->{-name} eq 'pos') && ($found = $_->{'#text'})) {
             $pos{$found} += $certainty // 1;
           };
         };
diff --git a/t/annotation/corpus/doc/0003/data.xml b/t/annotation/corpus/doc/0003/data.xml
new file mode 100644
index 0000000..a1dad20
--- /dev/null
+++ b/t/annotation/corpus/doc/0003/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="Corpus_Doc.0003" xmlns="http://ids-mannheim.de/ns/KorAP">
+  <metadata file="metadata.xml" />
+  <text>Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, bevor der Betrieb Ende Schuljahr eingestellt wird.</text>
+</raw_text>
diff --git a/t/annotation/corpus/doc/0003/header.xml b/t/annotation/corpus/doc/0003/header.xml
new file mode 100644
index 0000000..dd5c085
--- /dev/null
+++ b/t/annotation/corpus/doc/0003/header.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="text" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+  <fileDesc>
+    <titleStmt>
+      <textSigle>Corpus/Doc.Text</textSigle>
+      <t.title assemblage="regular"/>
+    </titleStmt>
+    <publicationStmt>
+      <distributor/>
+      <pubAddress/>
+      <availability region="world" status="unknown"/>
+      <pubDate/>
+    </publicationStmt>
+    <sourceDesc>
+      <biblStruct>
+        <analytic>
+          <h.title type="main">Beispiel Text</h.title>
+	  <h.title type="sub">Beispiel Text Untertitel</h.title>
+          <h.author>Mustermann, Max</h.author>
+	  <editor>Monika Mustermann</editor>
+          <imprint/>
+          <biblScope type="pp"/>
+          <biblScope type="suppl"/>
+          <biblScope type="suppltitle"/>
+          <biblNote n="1"/>
+        </analytic>
+        <monogr>
+	  <h.title type="main">Beispiel Text</h.title>
+          <h.title type="sub">Best of!</h.title>
+          <h.author>Mustermann, Max</h.author>
+          <editor>Monika Mustermann</editor>
+          <imprint>
+            <publisher>Artificial articles Inc.</publisher>
+            <pubDate type="year">2001</pubDate>
+            <pubDate type="month">04</pubDate>
+            <pubDate type="day">02</pubDate>
+	    <pubPlace>Mannheim</pubPlace>
+          </imprint>
+          <biblScope type="issue"/>
+          <biblScope type="issueplace"/>
+        </monogr>
+      </biblStruct>
+      <reference type="complete" assemblage="regular"/>
+      <reference type="short" assemblage="regular"/>
+    </sourceDesc>
+  </fileDesc>
+  <profileDesc>
+    <creation>
+      <creatDate>1999.06.01</creatDate>
+    </creation>
+    <textClass>
+      <catRef n="1" target="topic.freizeit-unterhaltung.vereine-veranstaltungen" scheme="topic"/>
+      <h.keywords>
+        <keyTerm/>
+      </h.keywords>
+    </textClass>
+    <textDesc>
+      <textType>Zeitung: Tageszeitung</textType>
+      <textTypeArt>Bericht</textTypeArt>
+      <textDomain/>
+      <column/>
+    </textDesc>
+  </profileDesc>
+</idsHeader>
diff --git a/t/annotation/corpus/doc/0003/opennlp/tokens.xml b/t/annotation/corpus/doc/0003/opennlp/tokens.xml
new file mode 100644
index 0000000..a56e28c
--- /dev/null
+++ b/t/annotation/corpus/doc/0003/opennlp/tokens.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.0003" VERSION="KorAP-0.4">
+<spanList>
+      <span id="s_7" from="0" to="3"/>
+      <span id="s_8" from="4" to="11"/>
+      <span id="s_9" from="12" to="23"/>
+      <span id="s_10" from="24" to="30"/>
+      <span id="s_11" from="31" to="35"/>
+      <span id="s_12" from="36" to="39"/>
+      <span id="s_13" from="40" to="47"/>
+      <span id="s_14" from="48" to="51"/>
+      <span id="s_15" from="52" to="63"/>
+      <span id="s_16" from="64" to="73"/>
+      <span id="s_17" from="74" to="77"/>
+      <span id="s_18" from="77" to="78"/>
+      <span id="s_19" from="79" to="84"/>
+      <span id="s_20" from="85" to="88"/>
+      <span id="s_21" from="89" to="96"/>
+      <span id="s_22" from="97" to="101"/>
+      <span id="s_23" from="102" to="111"/>
+      <span id="s_24" from="112" to="123"/>
+      <span id="s_25" from="124" to="128"/>
+      <span id="s_26" from="128" to="129"/>
+   </spanList>
+</layer>
diff --git a/t/annotation/corpus/doc/0003/tree_tagger/morpho.xml b/t/annotation/corpus/doc/0003/tree_tagger/morpho.xml
new file mode 100644
index 0000000..50228ea
--- /dev/null
+++ b/t/annotation/corpus/doc/0003/tree_tagger/morpho.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="Corpus_Doc.0003" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<spanList>
+  <span id="s1_n1" from="0" to="3">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">APPRART</f>
+      <f name="lemma">zu+die</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s1_n2" from="4" to="11">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">ADJA</f>
+      <f name="lemma">letzt</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s1_n3" from="12" to="23">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">ADJA</f>
+      <f name="lemma">kulturell</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s1_n4" from="24" to="30">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">NN</f>
+      <f name="lemma">Anlass</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s2_n1" from="31" to="35">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">VVFIN</f>
+      <f name="lemma">laden</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s2_n2" from="36" to="39">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">ART</f>
+      <f name="lemma">die</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s2_n3" from="" to="">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">NN</f>
+      <f name="lemma">Leitung</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n1" from="48" to="51">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">ART</f>
+      <f name="lemma">die</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n2" from="52" to="63">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">NN</f>
+      <f name="lemma">Schulheim</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n3" from="64" to="73">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">NN</f>
+      <f name="lemma">&lt;unknown&gt;</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n4" from="74" to="77">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">PTKVZ</f>
+      <f name="lemma">ein</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n5" from="77" to="78">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">$,</f>
+      <f name="lemma">,</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n6" from="79" to="84">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">KOUS</f>
+      <f name="lemma">bevor</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n7" from="85" to="88">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">ART</f>
+      <f name="lemma">die</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n8" from="89" to="96">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">NN</f>
+      <f name="lemma">Betrieb</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n9" from="97" to="101">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">NN</f>
+      <f name="lemma">Ende</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n10" from="102" to="111">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">NN</f>
+      <f name="lemma">Schuljahr</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n11" from="112" to="123">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">VVPP</f>
+      <f name="lemma">einstellen</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s3_n12" from="124" to="128">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">VAFIN</f>
+      <f name="lemma">werden</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+  <span id="s4_n1" from="48" to="51">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">$.</f>
+      <f name="lemma">.</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+ </spanList>
+</layer>
diff --git a/t/annotation/corpus/doc/0003/tree_tagger/tokens.xml b/t/annotation/corpus/doc/0003/tree_tagger/tokens.xml
new file mode 100644
index 0000000..a56e28c
--- /dev/null
+++ b/t/annotation/corpus/doc/0003/tree_tagger/tokens.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.0003" VERSION="KorAP-0.4">
+<spanList>
+      <span id="s_7" from="0" to="3"/>
+      <span id="s_8" from="4" to="11"/>
+      <span id="s_9" from="12" to="23"/>
+      <span id="s_10" from="24" to="30"/>
+      <span id="s_11" from="31" to="35"/>
+      <span id="s_12" from="36" to="39"/>
+      <span id="s_13" from="40" to="47"/>
+      <span id="s_14" from="48" to="51"/>
+      <span id="s_15" from="52" to="63"/>
+      <span id="s_16" from="64" to="73"/>
+      <span id="s_17" from="74" to="77"/>
+      <span id="s_18" from="77" to="78"/>
+      <span id="s_19" from="79" to="84"/>
+      <span id="s_20" from="85" to="88"/>
+      <span id="s_21" from="89" to="96"/>
+      <span id="s_22" from="97" to="101"/>
+      <span id="s_23" from="102" to="111"/>
+      <span id="s_24" from="112" to="123"/>
+      <span id="s_25" from="124" to="128"/>
+      <span id="s_26" from="128" to="129"/>
+   </spanList>
+</layer>
diff --git a/t/annotation/tt_morpho.t b/t/annotation/tt_morpho.t
index 705f7d0..0544394 100644
--- a/t/annotation/tt_morpho.t
+++ b/t/annotation/tt_morpho.t
@@ -42,8 +42,40 @@
 is($data->{stream}->[11]->[6], 'tt/p:PTKVZ$<b>129<b>51',
    'Lemma');
 
+is(scalar(@{$data->{stream}}), 18);
+
+
+ok($tokens = TestInit::tokens('0003'), 'Parse tokens');
+
+ok($tokens->add('TreeTagger', 'Morpho'), 'Add Structure');
+
+$data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!treetagger/morpho!, 'data');
+like($data->{layerInfos}, qr!tt/p=tokens!, 'data');
+like($data->{layerInfos}, qr!tt/l=tokens!, 'data');
+
+is($data->{stream}->[0]->[5], 'tt/l:zu+die', 'POS');
+is($data->{stream}->[0]->[6], 'tt/p:APPRART', 'POS');
+
+is($data->{stream}->[3]->[3], 'tt/l:Anlass', 'POS');
+is($data->{stream}->[3]->[4], 'tt/p:NN', 'POS');
+
+is($data->{stream}->[10]->[3], 'tt/l:ein', 'POS');
+is($data->{stream}->[10]->[4], 'tt/p:PTKVZ', 'POS');
+
+is($data->{stream}->[13]->[3], 'tt/l:Betrieb', 'POS');
+
+is($data->{stream}->[-1]->[3], 'tt/l:werden', 'POS');
+is($data->{stream}->[-1]->[4], 'tt/p:VAFIN', 'POS');
+
+is($data->{stream}->[11]->[3], 'tt/l:bevor',
+   'Lemma');
+is($data->{stream}->[11]->[4], 'tt/p:KOUS',
+   'Lemma');
+ok(!$data->{stream}->[11]->[6], 'No alternatives');
+
+is(scalar(@{$data->{stream}}), 18);
 
 done_testing;
-
 __END__
-
diff --git a/t/script/archive.t b/t/script/archive.t
index 41389bb..b0244ca 100644
--- a/t/script/archive.t
+++ b/t/script/archive.t
@@ -103,15 +103,20 @@
   # That's not really stable on slow machines!
   my $out = stdout_from(sub { system($call); });
 
-  ok($out =~ m!\[\$(\d+?):1\/2\]!s, $call . ' pid 1');
+  ok($out =~ m!\[\$(\d+?):1\/3\]!s, $call . ' pid 1');
   my $pid1 = $1;
-  ok($out =~ m!\[\$(\d+?):2\/2\]!s, $call . ' pid 2');
+  ok($out =~ m!\[\$(\d+?):2\/3\]!s, $call . ' pid 2');
   my $pid2 = $1;
+  ok($out =~ m!\[\$(\d+?):3\/3\]!s, $call . ' pid 3');
+  my $pid3 = $1;
 
   isnt($pid1, $pid2, 'No PID match');
+  isnt($pid2, $pid3, 'No PID match');
+  isnt($pid1, $pid3, 'No PID match');
 
   ok($out =~ m!Processed .+?\/corpus-doc-0001\.json!s, $call);
   ok($out =~ m!Processed .+?\/corpus-doc-0002\.json!s, $call);
+  ok($out =~ m!Processed .+?\/corpus-doc-0003\.json!s, $call);
 
   ok(-d $output, 'Temporary directory still exists');
   my $json_1 = catfile($output, 'corpus-doc-0001.json');