textcounter as disambiguator in text title; corpus header profileDesc; pubPlace/@key
diff --git a/vrt2tei.pl b/vrt2tei.pl
index c7789a8..dcb3cd1 100755
--- a/vrt2tei.pl
+++ b/vrt2tei.pl
@@ -11,7 +11,7 @@
# using XML::Twig , see http://www.xmltwig.org/, https://metacpan.org/pod/XML::Twiga
#
# usage: see below the usage function
-# Usage: ./xml2i5.pl <vrtxmlfile.xml> <outfile>
+# Usage: ./vrt2tei.pl <vrtxmlfile.xml> <outfile>
# <vrtxmlfile>: xml-ised vrt file
#
#
@@ -83,7 +83,7 @@
my $encoding = "UTF-8";
#my $encoding = "iso-8859-1"; # dieses $encoding ist NUR fuer das output s.u. twig funktion
-
+my $textcounter = 0;
#####################
@@ -158,7 +158,8 @@
my $teiHeader = $root ->insert_new_elt("first_child", 'teiHeader');
my $fileDesc = $teiHeader ->insert_new_elt("last_child", 'fileDesc');
-
+ my $profileDesc = $teiHeader ->insert_new_elt("last_child", 'profileDesc');
+
my $titleStmt = $fileDesc ->insert_new_elt("last_child", 'titleStmt');
my $title = $titleStmt ->insert_new_elt("last_child", 'title');
$title ->set_text(" KLK-fi-2021 for EuReCo");
@@ -171,6 +172,10 @@
my $bibl = $sourceDesc ->insert_new_elt("last_child", 'bibl');
$bibl ->set_text("ToDo");
+ my $langUsage = $profileDesc ->insert_new_elt("last_child", 'langUsage');
+ my $language = $langUsage ->insert_new_elt("last_child", 'language');
+ $language ->set_att("ident",'fi');
+ $language ->set_text("Finnish");
}
@@ -179,10 +184,11 @@
#----------------------------
sub text {
-
my ($twig, $text) = @_;
- # ToDo: catch all other, unexpected children
+ $textcounter++; # global variable
+
+ # ToDo: catch all other, unexpected children of root
#--------------------------------------------------------------------------
# Get text metadata (attributes of <text>) and create teiHeader for <text>
@@ -199,7 +205,7 @@
# set vrt <text> to <TEI> and delete all attributes after they were were saved above
$text->del_atts;
$text->set_gi("TEI");
-
+
#------------------------------------------------------------------
# create the <tei:text>, <body>, <div> elements inside <TEI>
#------------------------------------------------------------------
@@ -209,7 +215,8 @@
my $div_element = XML::Twig::Elt->new('div');
# set atts
- $div_element->set_att("type", "page"); # ToDo: this is specific to KLK
+ $div_element ->set_att("type", "page"); # ToDo: this is specific to KLK
+ $ttext_element->set_att("xml:lang", 'fi'); # as in ICC-NOR
# paste
$ttext_element->paste('last_child', $text);
@@ -253,12 +260,12 @@
&createW($w_element, $line);
$w_element->paste('last_child', $sentence);
}
- }
- }
- }
+ } # end words
+ } # end sentences
+ } # end paragraphs
- $twig->flush($OUT);
-}
+ $twig->flush($OUT);
+} # end texts
sub createTextHeader{
@@ -348,7 +355,8 @@
my $name = $respStmt ->insert_new_elt("last_child", 'name');
# set texts for titleStmt
- $title->set_text($LABEL . ", page " . $PAGENO); # Achtung - PAGENO scheint meist "None" zu sein
+ # $title->set_text($LABEL . ", page " . $PAGENO); # Achtung - PAGENO scheint meist "None" zu sein
+ $title->set_text($LABEL . ", Text #" . $textcounter); # at least for Suomen Kuvalehti
$resp ->set_text("compiled by EuReCo");
$name ->set_text("EuReCo: HL");
@@ -386,7 +394,8 @@
my $analytic_textlang = $analytic->insert_new_elt("last_child", 'textLang');
# set texts for analytic
- $analytic_title ->set_text($LABEL . ", page " . $PAGENO); # Achtung $PAGENO scheint meist "None zu sein"
+# $analytic_title ->set_text($LABEL . ", page " . $PAGENO); # Achtung $PAGENO scheint meist "None zu sein"
+ $analytic_title ->set_text($LABEL . ", Text #" . $textcounter); # Achtung $PAGENO scheint meist "None zu sein"
# $analytic_date ->set_text($DATE);
$analytic_date_year ->set_text($datearray[0]);
$analytic_date_month ->set_text($datearray[1]);
@@ -414,6 +423,7 @@
# set texts for monogr
$monogr_title ->set_text($PUBLTITLE);
$pubPlace ->set_text("TODO");
+ $pubPlace ->set_att("key",'FI');
$publisher ->set_text("TODO");
$biblScope_issuetitle->set_text($ISSUETITLE);
$biblScope_issueno ->set_text($ISSUENO);