textHeader, corpusHeader
diff --git a/vrt2tei.pl b/vrt2tei.pl
index 67227d2..dd1cffa 100755
--- a/vrt2tei.pl
+++ b/vrt2tei.pl
@@ -18,7 +18,7 @@
# TODO:
# 1 insert dtd spec, or ref to TEI
-# 3a UPLOAD in GITHUB
+# 3a remove the vrt positional attribute comment line / all comment lines
# 3b add @head and @deprel to I5 sowie auch @msd
# 3c bearbeitung von @head und @deprel in tei2korapxml durch Nils?
# 3d build 30 billion corpus
@@ -28,7 +28,6 @@
# 5 abfangen von unerwarteten elementen dh andere als <sentence> und <paragraph>
# 5a wort reihenfolge nochmal checken
# 6 checks and balances
-# 6a output nach stdout machen
# 7 How to encode Kielipankki and National Library of Finland? in teiCorpus Header
# 8 construct <idsDoc>s for the months (or go for TEI)
# 9 parallelisation in bash and application on sub corpora of KLK
@@ -64,6 +63,8 @@
use Time::Piece;
use Tie::IxHash;
+
+
#----------------------
# check file arguments:
#----------------------
@@ -83,30 +84,75 @@
my $textcounter = 0;
+
+my $twig="";
+my $teiCorpusHeaderDoc="";
+
+
+#------------------------------------------------------------------
+# read corpusHeaderSkeleton document and get header out of it
+#------------------------------------------------------------------
+
+my $teiCorpusHeaderDocTwig = new XML::Twig(
+ keep_spaces => 1,
+ keep_atts_order => 1,
+ comments => 'drop',
+ );
+
+
+$teiCorpusHeaderDocTwig->parsefile("teiCorpusHeaderSkeleton.tei.xml");
+my $corpusHeader = $teiCorpusHeaderDocTwig->root; # getting the teiHeader for corpus out of the teiCorpusHeaderSkeleton document
+
+
+#------------------------------------------------------------------
+# read textHeaderSkeleton document adn get header out of it
+#------------------------------------------------------------------
+
+my $teiTextHeaderDocTwig = new XML::Twig(
+ keep_spaces => 1,
+ keep_atts_order => 1,
+ comments => 'drop',
+ );
+
+$teiTextHeaderDocTwig->parsefile("teiTextHeaderSkeleton.tei.xml");
+my $textHeader = $teiTextHeaderDocTwig->root; # getting the teiHeader for corpus out of the teiTextHeaderSkeleton document
+
+
+#----------------------------------
+# read input VRT-XML document
+#----------------------------------
+
+open(my $IN, "< $ARGV[0]") || die("$0: cannot open file for reading: $ARGV[0]"); # open input file and initialise filehandel, actually does not seem to be needed
+ # as parsefile() (s.b.) is applied to the filename
+
+#-----------------------------------------------------
+# global variables pertaining to the original corpus
+#-----------------------------------------------------
+
+my $kielipankkiCorpus = "klk-fi-v2-vrt";
+
+
+
+
#####################
# M A I N
#####################
-open(my $IN, "< $ARGV[0]") || die("$0: cannot open file for reading: $ARGV[0]"); # open input file and initialise filehandel, actually does not seem to be needed
- # as parsefile() (s.b.) is applied to the filename
-## open(my $OUT, "> $ARGV[1]") || die("$0: cannot open file: $ARGV[1]"); # open result file and initialise filehandle
- # currently not used but flushed to "/dev/stdout"
+#-------------------------------------------------------------------------------------------------------------
+# start twig for input and call start tag handler for root and twig handler for each <text> in the VRT
+#-------------------------------------------------------------------------------------------------------------
-#-----------------------------------------------------------------------------------
-# start twig and call start tag handler for root and twig handler for each <text>
-#-----------------------------------------------------------------------------------
-
-my $twig="";
$twig = new XML::Twig(
keep_spaces => 1, # dadurch auch whitespaces an ehemeligen elementgrenzen im output
keep_atts_order => 1, # requires Tie::IxHash
- pretty_print => 'indented',
+ comments => 'drop',
start_tag_handlers => {
- texts => \&root
+ texts => sub{root(@_, $corpusHeader)}
},
twig_handlers => {
- text => \&text
+# text => \&text
+ text => sub{text(@_, $textHeader)}
},
# dtd_handlers => { # ToDo for I5
# \&set_dtd;
@@ -117,7 +163,8 @@
$twig->parsefile($ARGV[0]);
-##TMP close($OUT);
+
+
###########
@@ -141,53 +188,54 @@
sub root {
- my ($twig, $root) =@_;
+ my ($twig, $root, $corpusHeader) =@_;
$root->set_gi('teiCorpus');
$root->set_att("xmlns", 'http://www.tei-c.org/ns/1.0');
- &insertCorpusHeader($root);
+ &insertCorpusHeader($root, $corpusHeader);
}
+
sub insertCorpusHeader{
- my ($root) =@_;
+ my ($root, $corpusHeader) =@_;
+ #---------------------------------------------------------------------------
+ # get some metadata for the current output corpus based on source and year
+ #---------------------------------------------------------------------------
+
my @array = split(/\//, $ARGV[0]);
my $l = scalar(@array);
my $source = $array[$l-1];
- $source =~ s/\.xml//;
- $source = $source . " from klk-fi-v2-vrt"; # for the time being; TODO
-
- my $teiHeader = $root ->insert_new_elt("first_child", 'teiHeader');
- my $fileDesc = $teiHeader ->insert_new_elt("last_child", 'fileDesc');
- my $profileDesc = $teiHeader ->insert_new_elt("last_child", 'profileDesc');
-
- my $titleStmt = $fileDesc ->insert_new_elt("last_child", 'titleStmt');
- my $title = $titleStmt ->insert_new_elt("last_child", 'title');
- $title ->set_text($source . " from KLK-fi-2021 for EuReCo");
-
- my $publicationStmt = $fileDesc ->insert_new_elt("last_child", 'publicationStmt');
- my $distributor = $publicationStmt->insert_new_elt("last_child", 'distributor');
- $distributor ->set_text("NOT FOR DISTRIBUTION - to be used locally in EuReCo");
+ $source =~ s/([0-9][0-9][0-9][0-9])\.xml$//;
- my $sourceDesc = $fileDesc ->insert_new_elt("last_child", 'sourceDesc');
- my $bibl = $sourceDesc ->insert_new_elt("last_child", 'bibl');
- $bibl ->set_text($source);
+ my $language="Finnish";
+ my $lang_tla="fi";
+
+ my $yy = $1; # $1 now containts substring in first bracket in regex above
- my $langUsage = $profileDesc ->insert_new_elt("last_child", 'langUsage');
- my $language = $langUsage ->insert_new_elt("last_child", 'language');
- $language ->set_att("ident",'fi');
- $language ->set_text("Finnish");
+ my $ctitle = $source . " " . $yy . ", from ". $kielipankkiCorpus . " for EuReCo"; # to do: also get name of corpus (klk-fi-v2-vrt)
+
+
+ #-----------------------
+ # set corpus header
+ #-----------------------
+
+ &set_title( $corpusHeader, $source, $yy, $kielipankkiCorpus);
+ &set_sourceDesc($corpusHeader, $source, $yy, $kielipankkiCorpus);
+
+ my $teiCorpusHeader = $corpusHeader->paste("first_child", $root);
+
}
-
+
#----------------------------
# handler &text for <text>
#----------------------------
sub text {
- my ($twig, $text) = @_;
+ my ($twig, $text, $textHeader) = @_;
$textcounter++; # global variable
@@ -199,7 +247,8 @@
my $textattsref = $text->atts(); # $textattsref is now a reference to a hash and should be used with '->'
- &createTextHeader($text, $textattsref);
+
+ &createTextHeader($text, $textattsref, $textHeader);
#--------------------------
# create <TEI> from <text>
@@ -266,13 +315,13 @@
} # end sentences
} # end paragraphs
- $twig->set_pretty_print( 'record');
+ # $twig->set_pretty_print( 'record');
# $twig->flush($OUT);
$twig->flush("/dev/stdout");
}
sub createTextHeader{
- my ($text, $textattsref) = @_;
+ my ($text, $textattsref, $textHeader) = @_;
# USE 01 binding_id="2246025"
# USE 02 date="2021-01-15"
@@ -332,140 +381,208 @@
my @namearray = split(/[\.\/]/, $ORIGFILENAME); # use $namearray[4] as ID for the page
+
#-----------------------------------------------------------------------
- # CREATE text-teiHeader ACCORDING TO THE SKELETON in klk-header.tei.xml
+ # CREATE text-teiHeader ACCORDING TO THE SKELETON in $textHeader
#-----------------------------------------------------------------------
- # create <teiHeader> inside <TEI>
- my $teiHeader = XML::Twig::Elt->new('teiHeader');
- $teiHeader->paste('first_child', $text);
- ## insert_new_elt is a combo of new and paste, cf. xml::twig docu:
- ## insert_new_elt ($opt_position, $gi, $opt_atts_hashref, @opt_content)
+ $textHeader->paste('first_child', $text);
+
+ #-----------------------------------------------
+ # <teiHeader>
+ # <fileDesc n="[EuReCo-KLK-FIN_$ID]">
+ # <titleStmt>
+ # <title>[$LABEL, page $PAGENO]</title>
+
+ $textHeader->first_child("fileDesc")->first_child("titleStmt")->first_child("title")
+ ->set_text($LABEL . ", Text #" . $textcounter); # Case KLK
+
+ #-----------------------------------------------
+ # <fileDesc>
+ # <sourceDesc>
+ # <biblStruct>
+ # <analytic>
+ # <title type="main">[$LABEL, page $PAGENO]</title>
+ # <date>[$DATE]</date>
+ # <date type="year">TODO</date>
+ # <date type="month">TODO</date>
+ # <date type="day">TODO</date>
+ # <idno type="PAGEID">$PAGEID</idno>
+ # <idno type="BINDINGID">$BID</idno>
+ # <idno type="ID">$ID</idno>
+ # <idno type="KIELIPANKKI_METAFILENAME">$METAFILENAME</idno>
+ # <idno type="KIELIPANKKI_ORIGFILENAME">$ORIGFILENAME</idno>
+ # <textLang>$LANGUAGE</textLang>
+ # </analytic>
+
+ my $analytic = $textHeader->get_xpath("./fileDesc/sourceDesc/biblStruct/analytic", 0);
+
+ $analytic->first_child("title") ->set_text($LABEL . ", Text #" . $textcounter); # Case KLK
+ $analytic->get_xpath('./date[@type="year"]', 0) ->set_text($datearray[0]);
+ $analytic->get_xpath('./date[@type="month"]', 0) ->set_text($datearray[1]);
+ $analytic->get_xpath('./date[@type="day"]', 0) ->set_text($datearray[2]);
+ $analytic->get_xpath('./idno[@type="PAGEID"]', 0) ->set_text($PAGEID);
+ $analytic->get_xpath('./idno[@type="BINDINGID"]', 0) ->set_text($BID);
+ $analytic->get_xpath('./idno[@type="ID"]', 0) ->set_text($ID);
+ $analytic->get_xpath('./idno[@type="KIELIPANKKI_METAFILENAME"]', 0) ->set_text($METAFILENAME);
+ $analytic->get_xpath('./idno[@type="KIELIPANKKI_ORIGFILENAME"]', 0) ->set_text($ORIGFILENAME);
+ $analytic->first_child('textLang') ->set_text($LANGUAGE);
- my $fileDesc = $teiHeader->insert_new_elt('fileDesc' => {n => "EuReCo_KLK-fi_" . $namearray[4]});
- my $encodingDesc = $teiHeader->insert_new_elt("last_child", 'encodingDesc');
- my $profileDesc = $teiHeader->insert_new_elt("last_child", 'profileDesc');
- my $revisionDesc = $teiHeader->insert_new_elt("last_child", 'revisionDesc');
+ # <monogr>
+ # <title>$PUBLTITLE</title>
+ # <imprint>
+ # <pubPlace>TODO</pubPlace>
+ # <publisher>TODO</publisher>
+ # </imprint>
+ # <biblScope unit="ISSUETITLE"/>
+ # <biblScope unit="ISSUENO"/>
+ # <biblScope unit="ISSUEDATE"/>
+ # <biblScope unit="pp">$PAGENO</biblScope>
+ # <monogr>
- #---------------------
- # fileDesc/titleStmt
- #---------------------
- my $titleStmt = $fileDesc ->insert_new_elt('titleStmt');
- my $title = $titleStmt->insert_new_elt("last_child", 'title');
- my $respStmt = $titleStmt->insert_new_elt("last_child", 'respStmt');
- my $resp = $respStmt ->insert_new_elt("last_child", 'resp');
- my $name = $respStmt ->insert_new_elt("last_child", 'name');
+ my $monogr = $textHeader->get_xpath("./fileDesc/sourceDesc/biblStruct/monogr", 0);
- # set texts for titleStmt
- # $title->set_text($LABEL . ", page " . $PAGENO); # Achtung - PAGENO scheint meist "None" zu sein
- $title->set_text($LABEL . ", Text #" . $textcounter); # at least for Suomen Kuvalehti
- $resp ->set_text("compiled by EuReCo");
- $name ->set_text("EuReCo: HL");
- #--------------------------
- # fileDesc/publicationStmt
- #--------------------------
- my $publicationStmt = $fileDesc ->insert_new_elt("last_child", 'publicationStmt');
- my $distributor = $publicationStmt->insert_new_elt("last_child", 'distributor');
- my $note = $distributor ->insert_new_elt("last_child", 'note');
- my $availability = $publicationStmt->insert_new_elt("last_child", 'availability');
- my $licence = $availability ->insert_new_elt("last_child", 'licence');
- # set texts for publicationStmt
- $note ->set_text("NOT FOR DISTRIBUTION - to be used locally in EuReCo");
- $licence->set_text("CLARIN-RES"); # TODO: Ausfuherlichere Licence info in KLK Metadata Record
- #------------------------------
- # fileDesc/sourceDesc/biblStruct
- #------------------------------
- my $sourceDesc = $fileDesc ->insert_new_elt("last_child", 'sourceDesc');
- my $biblStruct = $sourceDesc->insert_new_elt("last_child", 'biblStruct');
-
- # fileDesc/sourceDesc/biblStruct/analytic
- my $analytic = $biblStruct->insert_new_elt("last_child", 'analytic');
- my $analytic_title = $analytic->insert_new_elt("last_child", 'title' => {type => "main"} );
-# my $analytic_date = $analytic->insert_new_elt("last_child", 'date');
- my $analytic_date_year = $analytic->insert_new_elt("last_child", 'date' => {type => "year"});
- my $analytic_date_month = $analytic->insert_new_elt("last_child", 'date' => {type => "month"});
- my $analytic_date_day = $analytic->insert_new_elt("last_child", 'date' => {type => "day"});
- my $analytic_idno_pageid = $analytic->insert_new_elt("last_child", 'idno' => {type => "PAGEID"});
- my $analytic_idno_bindingid = $analytic->insert_new_elt("last_child", 'idno' => {type => "BINDINGID"});
- my $analytic_idno_id = $analytic->insert_new_elt("last_child", 'idno' => {type => "ID"});
- my $analytic_idno_metafile = $analytic->insert_new_elt("last_child", 'idno' => {type => "KIELIPANKKI_METAFILENAME"});
- my $analytic_idno_origfile = $analytic->insert_new_elt("last_child", 'idno' => {type => "KIELIPANKKI_ORIGFILENAME"});
- my $analytic_textlang = $analytic->insert_new_elt("last_child", 'textLang');
-
- # set texts for analytic
-# $analytic_title ->set_text($LABEL . ", page " . $PAGENO); # Achtung $PAGENO scheint meist "None zu sein"
- $analytic_title ->set_text($LABEL . ", Text #" . $textcounter); # Achtung $PAGENO scheint meist "None zu sein"
-# $analytic_date ->set_text($DATE);
- $analytic_date_year ->set_text($datearray[0]);
- $analytic_date_month ->set_text($datearray[1]);
- $analytic_date_day ->set_text($datearray[2]);
- $analytic_idno_pageid ->set_text($PAGEID);
- $analytic_idno_bindingid->set_text($BID);
- $analytic_idno_id ->set_text($ID);
- $analytic_idno_metafile ->set_text($METAFILENAME);
- $analytic_idno_origfile ->set_text($ORIGFILENAME);
- $analytic_textlang ->set_text($LANGUAGE);
-
- #-------------------------------------
- # fileDesc/sourceDesc/biblStruct/monogr
- #-------------------------------------
- my $monogr = $biblStruct->insert_new_elt("last_child", 'monogr');
- my $monogr_title = $monogr ->insert_new_elt("last_child", 'title');
- my $imprint = $monogr ->insert_new_elt("last_child", 'imprint'); # imprint is needed for valididty
- my $pubPlace = $imprint ->insert_new_elt("last_child", 'pubPlace'); # imprint is needed for validity
- my $publisher = $imprint ->insert_new_elt("last_child", 'publisher'); # imprint is needed for validity
- my $biblScope_issuetitle = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'ISSUETITLE'} );
- my $biblScope_issueno = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'ISSUENO'} );
- my $biblScope_issuedate = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'ISSUEDATE'} );
- my $biblScope_pp = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'PAGENO'} ); # Achtung PAGENO ist meist "None" ?
-
- # set texts for monogr
- $monogr_title ->set_text($PUBLTITLE);
- $pubPlace ->set_text("TODO");
- $pubPlace ->set_att("key",'FI');
- $publisher ->set_text("TODO");
- $biblScope_issuetitle->set_text($ISSUETITLE);
- $biblScope_issueno ->set_text($ISSUENO);
- $biblScope_issuedate ->set_text($ISSUEDATE);
- $biblScope_pp ->set_text($PAGENO);
-
- #---------------
- # encodingDesc
- #---------------
- my $tagsDecl = $encodingDesc->insert_new_elt("last_child", 'tagsDecl');
- my $namespace = $tagsDecl ->insert_new_elt("last_child", 'namespace' => {name => 'http://www.tei-c.org/ns/1.0'});
- my $tagUsage_s = $namespace ->insert_new_elt("last_child", 'tagUsage' => {gi => 's', occurs => $SENTCOUNT});
- my $tagUsage_w = $namespace ->insert_new_elt("last_child", 'tagUsage' => {gi => 'w', occurs => $TOKENCOUNT});
- #-------------
- # profileDesc
- #-------------
- my $langUsage = $profileDesc ->insert_new_elt("last_child", 'langUsage');
- my $language = $langUsage ->insert_new_elt("last_child", 'language' => {ident => $LANGUAGE, usage => $SUMLANG});
- # Achtung in @usage muss eigt. ein integer; am besten inhalt von SUMLANG aufdroeseln und mehrere <language> machen
- my $textClass = $profileDesc ->insert_new_elt("last_child", 'textClass');
- my $classCode_fi = $textClass ->insert_new_elt("last_child", 'classCode' => {scheme => "KLK_PUBLTYPE"});
-# my $classCode_en = $textClass ->insert_new_elt("last_child", 'classCode' => {scheme => "KLK_PUBLTYPE_MAPPED"});
+##TMP # create <teiHeader> inside <TEI>
+##TMP my $teiHeader = XML::Twig::Elt->new('teiHeader');
+##TMP # $teiHeader->paste('first_child', $text);
+##TMP
+##TMP ## insert_new_elt is a combo of new and paste, cf. xml::twig docu:
+##TMP ## insert_new_elt ($opt_position, $gi, $opt_atts_hashref, @opt_content)
+##TMP
+##TMP my $fileDesc = $teiHeader->insert_new_elt('fileDesc' => {n => "EuReCo_KLK-fi_" . $namearray[4]});
+##TMP my $encodingDesc = $teiHeader->insert_new_elt("last_child", 'encodingDesc');
+##TMP my $profileDesc = $teiHeader->insert_new_elt("last_child", 'profileDesc');
+##TMP my $revisionDesc = $teiHeader->insert_new_elt("last_child", 'revisionDesc');
+##TMP
+##TMP #---------------------
+##TMP # fileDesc/titleStmt
+##TMP #---------------------
+##TMP my $titleStmt = $fileDesc ->insert_new_elt('titleStmt');
+##TMP my $title = $titleStmt->insert_new_elt("last_child", 'title');
+##TMP my $respStmt = $titleStmt->insert_new_elt("last_child", 'respStmt');
+##TMP my $resp = $respStmt ->insert_new_elt("last_child", 'resp');
+##TMP my $name = $respStmt ->insert_new_elt("last_child", 'name');
+##TMP
+##TMP # set texts for titleStmt
+##TMP # $title->set_text($LABEL . ", page " . $PAGENO); # Achtung - PAGENO scheint meist "None" zu sein
+##TMP $title->set_text($LABEL . ", Text #" . $textcounter); # at least for Suomen Kuvalehti
+##TMP $resp ->set_text("compiled by EuReCo");
+##TMP $name ->set_text("EuReCo: HL");
+##TMP
+##TMP #--------------------------
+##TMP # fileDesc/publicationStmt
+##TMP #--------------------------
+##TMP my $publicationStmt = $fileDesc ->insert_new_elt("last_child", 'publicationStmt');
+##TMP my $distributor = $publicationStmt->insert_new_elt("last_child", 'distributor');
+##TMP my $note = $distributor ->insert_new_elt("last_child", 'note');
+##TMP my $availability = $publicationStmt->insert_new_elt("last_child", 'availability');
+##TMP my $licence = $availability ->insert_new_elt("last_child", 'licence');
+##TMP
+##TMP # set texts for publicationStmt
+##TMP $note ->set_text("NOT FOR DISTRIBUTION - to be used locally in EuReCo");
+##TMP $licence->set_text("CLARIN_RES"); # TODO: Ausfuherlichere Licence info in KLK Metadata Record
+##TMP
+##TMP #------------------------------
+##TMP # fileDesc/sourceDesc/biblStruct
+##TMP #------------------------------
+##TMP my $sourceDesc = $fileDesc ->insert_new_elt("last_child", 'sourceDesc');
+##TMP my $biblStruct = $sourceDesc->insert_new_elt("last_child", 'biblStruct');
+##TMP
+##TMP # fileDesc/sourceDesc/biblStruct/analytic
+##TMP my $analytic = $biblStruct->insert_new_elt("last_child", 'analytic');
+##TMP my $analytic_title = $analytic->insert_new_elt("last_child", 'title' => {type => "main"} );
+##TMP # my $analytic_date = $analytic->insert_new_elt("last_child", 'date');
+##TMP my $analytic_date_year = $analytic->insert_new_elt("last_child", 'date' => {type => "year"});
+##TMP my $analytic_date_month = $analytic->insert_new_elt("last_child", 'date' => {type => "month"});
+##TMP my $analytic_date_day = $analytic->insert_new_elt("last_child", 'date' => {type => "day"});
+##TMP my $analytic_idno_pageid = $analytic->insert_new_elt("last_child", 'idno' => {type => "PAGEID"});
+##TMP my $analytic_idno_bindingid = $analytic->insert_new_elt("last_child", 'idno' => {type => "BINDINGID"});
+##TMP my $analytic_idno_id = $analytic->insert_new_elt("last_child", 'idno' => {type => "ID"});
+##TMP my $analytic_idno_metafile = $analytic->insert_new_elt("last_child", 'idno' => {type => "KIELIPANKKI_METAFILENAME"});
+##TMP my $analytic_idno_origfile = $analytic->insert_new_elt("last_child", 'idno' => {type => "KIELIPANKKI_ORIGFILENAME"});
+##TMP my $analytic_textlang = $analytic->insert_new_elt("last_child", 'textLang');
+##TMP
+##TMP # set texts for analytic
+##TMP # $analytic_title ->set_text($LABEL . ", page " . $PAGENO); # Achtung $PAGENO scheint meist "None zu sein"
+##TMP $analytic_title ->set_text($LABEL . ", Text #" . $textcounter); # Achtung $PAGENO scheint meist "None zu sein"
+##TMP # $analytic_date ->set_text($DATE);
+##TMP $analytic_date_year ->set_text($datearray[0]);
+##TMP $analytic_date_month ->set_text($datearray[1]);
+##TMP $analytic_date_day ->set_text($datearray[2]);
+##TMP $analytic_idno_pageid ->set_text($PAGEID);
+##TMP $analytic_idno_bindingid->set_text($BID);
+##TMP $analytic_idno_id ->set_text($ID);
+##TMP $analytic_idno_metafile ->set_text($METAFILENAME);
+##TMP $analytic_idno_origfile ->set_text($ORIGFILENAME);
+##TMP $analytic_textlang ->set_text($LANGUAGE);
+##TMP
+##TMP #-------------------------------------
+##TMP # fileDesc/sourceDesc/biblStruct/monogr
+##TMP #-------------------------------------
+##TMP my $monogr = $biblStruct->insert_new_elt("last_child", 'monogr');
+##TMP my $monogr_title = $monogr ->insert_new_elt("last_child", 'title');
+##TMP my $imprint = $monogr ->insert_new_elt("last_child", 'imprint'); # imprint is needed for valididty
+##TMP my $pubPlace = $imprint ->insert_new_elt("last_child", 'pubPlace'); # imprint is needed for validity
+##TMP my $publisher = $imprint ->insert_new_elt("last_child", 'publisher'); # imprint is needed for validity
+##TMP my $biblScope_issuetitle = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'ISSUETITLE'} );
+##TMP my $biblScope_issueno = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'ISSUENO'} );
+##TMP my $biblScope_issuedate = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'ISSUEDATE'} );
+##TMP my $biblScope_pp = $monogr ->insert_new_elt("last_child", 'biblScope' => {unit => 'PAGENO'} ); # Achtung PAGENO ist meist "None" ?
+##TMP
+##TMP # set texts for monogr
+##TMP $monogr_title ->set_text($PUBLTITLE);
+##TMP $pubPlace ->set_text("TODO");
+##TMP $pubPlace ->set_att("key",'FI');
+##TMP $publisher ->set_text("TODO");
+##TMP $biblScope_issuetitle->set_text($ISSUETITLE);
+##TMP $biblScope_issueno ->set_text($ISSUENO);
+##TMP $biblScope_issuedate ->set_text($ISSUEDATE);
+##TMP $biblScope_pp ->set_text($PAGENO);
+##TMP
+##TMP #---------------
+##TMP # encodingDesc
+##TMP #---------------
+##TMP my $tagsDecl = $encodingDesc->insert_new_elt("last_child", 'tagsDecl');
+##TMP my $namespace = $tagsDecl ->insert_new_elt("last_child", 'namespace' => {name => 'http://www.tei-c.org/ns/1.0'});
+##TMP my $tagUsage_s = $namespace ->insert_new_elt("last_child", 'tagUsage' => {gi => 's', occurs => $SENTCOUNT});
+##TMP my $tagUsage_w = $namespace ->insert_new_elt("last_child", 'tagUsage' => {gi => 'w', occurs => $TOKENCOUNT});
+##TMP
+##TMP #-------------
+##TMP # profileDesc
+##TMP #-------------
+##TMP my $langUsage = $profileDesc ->insert_new_elt("last_child", 'langUsage');
+##TMP my $language = $langUsage ->insert_new_elt("last_child", 'language' => {ident => $LANGUAGE, usage => $SUMLANG});
+##TMP # Achtung in @usage muss eigt. ein integer; am besten inhalt von SUMLANG aufdroeseln und mehrere <language> machen
+##TMP my $textClass = $profileDesc ->insert_new_elt("last_child", 'textClass');
+##TMP my $classCode_fi = $textClass ->insert_new_elt("last_child", 'classCode' => {scheme => "KLK_PUBLTYPE"});
+##TMP # my $classCode_en = $textClass ->insert_new_elt("last_child", 'classCode' => {scheme => "KLK_PUBLTYPE_MAPPED"});
+##TMP
+##TMP #---------------------------
+##TMP # set texts for profileDesc
+##TMP #---------------------------
+##TMP $classCode_fi ->set_text($PUBLTYPE);
+##TMP # $classCode_en->set_text($PUBLTYPETRANSL);
+##TMP
+##TMP #---------------
+##TMP # revisionDesc
+##TMP #---------------
+##TMP my $change = $revisionDesc ->insert_new_elt("last_child", 'change' => {when => localtime->ymd('-'), who => 'HL' });
+##TMP
+##TMP # set texts for revisionDesc
+##TMP $change->set_text("TEI version for EuReCo");
- #---------------------------
- # set texts for profileDesc
- #---------------------------
- $classCode_fi ->set_text($PUBLTYPE);
-# $classCode_en->set_text($PUBLTYPETRANSL);
- #---------------
- # revisionDesc
- #---------------
- my $change = $revisionDesc ->insert_new_elt("last_child", 'change' => {when => localtime->ymd('-'), who => 'HL' });
- # set texts for revisionDesc
- $change->set_text("TEI version for EuReCo");
+
+
+
###################################
# END OF CREATING TEIHEADER
###################################
@@ -549,6 +666,53 @@
}
+
+sub set_title{
+ my ($corpusHeader, $source, $yy, $kielipankkiCorpus) = @_;
+
+ my $cTitleString = $source . " " . $yy . ", from ". $kielipankkiCorpus . " for EuReCo";
+
+ #<teiHeader>
+ # <fileDesc>
+ # <titleStmt>
+ # <title>[Aamulehti2021] from [klk-fi-v2-vrt for] EuReCo</title>
+ # </titleStmt>
+ # <!-- ... -->
+ # </fileDesc>
+ #</teiHeader>
+
+ my $cTitleNode = $corpusHeader->first_child("fileDesc")->first_child("titleStmt")->first_child("title");
+
+ $cTitleNode->set_text($cTitleString);
+
+}
+
+sub set_sourceDesc{
+ my ($corpusHeader, $source, $yy, $kielipankkiCorpus) = @_;
+
+ my $cBiblString = $source . " " . $yy . ", from ". $kielipankkiCorpus . " for EuReCo";
+
+ #<teiHeader>
+ # <fileDesc>
+ # <!-- ... -->
+ # <sourceDesc>
+ # <bibl>[Aamulehti2021] from [klk-fi-v2-vrt]</bibl>
+ # </sourceDesc>
+ # <!-- ... -->
+ # </fileDesc>
+ #</teiHeader>
+
+ my $cBiblNode = $corpusHeader->first_child("fileDesc")->first_child("sourceDesc")->first_child("bibl");
+
+ $cBiblNode->set_text($cBiblString);
+}
+
+
+
+
+
+
+
#################
## usage_message
#################