|  | #/bin/bash | 
|  |  | 
|  | EURECO="/scratch/project_2010889/eureco" | 
|  |  | 
|  | CORPUS="$EURECO/klk-fi-v2-vrt" | 
|  | TEIDTD="$EURECO/tei/tei_all.dtd" | 
|  |  | 
|  | DAT="$CORPUS/data"                # input vrt files of klk-fi by year | 
|  | XML="$CORPUS/XML"                 # output zipped vrtxml files of selection of papers | 
|  | TEI="$CORPUS/TEI"                 # output zipped tei files of the selection | 
|  |  | 
|  |  | 
|  | mkdir -p $XML | 
|  | mkdir -p $TEI | 
|  |  | 
|  |  | 
|  |  | 
|  | # Multiple threads for a loop in bash: | 
|  |  | 
|  | ## for stuff in things | 
|  | ## do | 
|  | ## ( something | 
|  | ##   with | 
|  | ##   stuff ) & | 
|  | ## done | 
|  | ## wait # for all the something with stuff | 
|  |  | 
|  |  | 
|  |  | 
|  | # Generate corpus files by year and source in vrt | 
|  |  | 
|  | # for VRT in "$DAT/*.vrt" | 
|  | for VRT in "$DAT/klk_fi_v2_2021.vrt"        # to be applied to yearly files | 
|  | do # ( | 
|  |  | 
|  | BASENAME=`basename $VRT .vrt` | 
|  | YY=`echo $BASENAME | gawk 'BEGIN {FS="_"} {print $4}'` | 
|  |  | 
|  | echo "Generating proper XML files from $VRT in $XML/$YY by source..." | 
|  | #gawk -v OUTDIR="$XML" -v YEAR="$YY" -f vrt2xml.awk $VRT               # will generate corpus files for different sources in the YEAR dir | 
|  |  | 
|  | echo "Checking Wellformedness and generating TEI..." | 
|  | for s in $XML/*/ | 
|  | # for s in $XML/Aamulehti/ | 
|  | do ( # threading | 
|  |  | 
|  | SOURCE=`basename $s` | 
|  | x=$s/$SOURCE$YY.xml | 
|  |  | 
|  | echo "  checking wellformedness of $x" | 
|  | xmllint --noout $x                          # ToDo: make if condition for continuing only if well-formed | 
|  |  | 
|  | BASENAME=`basename $x .xml` | 
|  | mkdir -p $TEI/$SOURCE | 
|  | u="$TEI/$SOURCE/$BASENAME.tei.0.xml" | 
|  | t="$TEI/$SOURCE/$BASENAME.tei.xml" | 
|  |  | 
|  | echo "  generating $u..." | 
|  | ./vrt2tei.pl $x $u                          # ToDo: pipe in and out | 
|  |  | 
|  | echo "  zipping $x..." | 
|  | gzip $x | 
|  |  | 
|  | echo "  validating and prettifying $u..." | 
|  | xmllint --format --dtdvalid $TEIDTD $u > $t | 
|  | ls -l $t | 
|  |  | 
|  | echo "  removing $u..." | 
|  | rm $u; | 
|  | echo "  zipping $t..." | 
|  | gzip $t | 
|  |  | 
|  | echo ) & | 
|  | done | 
|  | wait # ) & | 
|  |  | 
|  | done | 
|  | # wait | 
|  |  | 
|  |  | 
|  |  |