Added treatment of CMC data
Change-Id: I43385a4666bf8469d2198127d01e22deaf2fc39a
diff --git a/MANIFEST b/MANIFEST
index 65f5f5a..679a38e 100755
--- a/MANIFEST
+++ b/MANIFEST
@@ -20,6 +20,7 @@
lib/KorAP/XML/Annotation/Base.pm
lib/KorAP/XML/Annotation/Base/Paragraphs.pm
lib/KorAP/XML/Annotation/Base/Sentences.pm
+lib/KorAP/XML/Annotation/CMC/Morpho.pm
lib/KorAP/XML/Annotation/Connexor/Sentences.pm
lib/KorAP/XML/Annotation/Connexor/Morpho.pm
lib/KorAP/XML/Annotation/Connexor/Phrase.pm
diff --git a/lib/KorAP/XML/Annotation/CMC/Morpho.pm b/lib/KorAP/XML/Annotation/CMC/Morpho.pm
new file mode 100644
index 0000000..94fd291
--- /dev/null
+++ b/lib/KorAP/XML/Annotation/CMC/Morpho.pm
@@ -0,0 +1,40 @@
+package KorAP::XML::Annotation::CMC::Morpho;
+use KorAP::XML::Annotation::Base;
+
+sub parse {
+ my $self = shift;
+
+ $$self->add_tokendata(
+ foundry => 'cmc',
+ layer => 'morpho',
+ cb => sub {
+ my ($stream, $token) = @_;
+ my $mtt = $stream->pos($token->pos);
+
+ my $content = $token->hash->{fs}->{f};
+
+ my $found;
+
+ foreach my $f (@{$content->{fs}->{f}}) {
+
+ # pos tag
+ if (($f->{-name} eq 'pos') &&
+ ($found = $f->{'#text'})) {
+ $mtt->add(term => 'cmc/p:' . $found);
+ }
+
+ # lemma tag
+ elsif (($f->{-name} eq 'lemma')
+ && ($found = $f->{'#text'})) {
+ $mtt->add(term => 'cmc/l:' . $found);
+ };
+ };
+ }) or return;
+ return 1;
+};
+
+sub layer_info {
+ ['cmc/l=tokens', 'cmc/p=tokens']
+}
+
+1;