changed validating and checking wellformedness
diff --git a/klk2eureco.sh b/klk2eureco.sh
index 9864644..47a67b6 100755
--- a/klk2eureco.sh
+++ b/klk2eureco.sh
@@ -39,40 +39,46 @@
echo "Generating proper XML files from $VRT in $XML/$YY by source..."
#gawk -v OUTDIR="$XML" -v YEAR="$YY" -f vrt2xml.awk $VRT # will generate corpus files for different sources in the YEAR dir
- echo "Checking Wellformedness and generating TEI..."
- for s in $XML/*/
- # for s in $XML/Aamulehti/
- do ( # threading
-
+ # Checking Wellformedness of the XML and generating TEI
+ # for s in $XML/*/
+ # for s in $XML/Suomen_Kuvalehti/
+ for s in $XML/Aamulehti/ $XML/Etela-Suomen_Sanomat/ $XML/Hameen_Sanomat/ $XML/Helsingin_Sanomat/ $XML/Ilkka-Pohjalainen/ $XML/Ilta-Sanomat/ $XML/Kaleva/ $XML/Keskipohjanmaa/ $XML/Satakunnan_Kansa/ $XML/Savon_Sanomat/ $XML/Turun_Sanomat/
+ do # ( # threading
SOURCE=`basename $s`
x=$s/$SOURCE$YY.xml
- echo " checking wellformedness of $x"
- xmllint --noout $x # ToDo: make if condition for continuing only if well-formed
-
+ ## echo " checking wellformedness of $x"
+ ## xmllint --noout $x # ToDo: make if condition for continuing only if well-formed
+ # if($R != 0){
+ # echo "Error: xmllint error with error return code $R" >&2; # to stderr
+ # break;
+ # }
+
BASENAME=`basename $x .xml`
mkdir -p $TEI/$SOURCE
- u="$TEI/$SOURCE/$BASENAME.tei.0.xml"
t="$TEI/$SOURCE/$BASENAME.tei.xml"
+ t0="$TEI/$SOURCE/$BASENAME.tei.0.xml"
- echo " generating $u..."
- ./vrt2tei.pl $x $u # ToDo: pipe in and out
+ echo " generating $t0 using vrt2tei.pl"
+ ./vrt2tei.pl $x > $t0
- echo " zipping $x..."
- gzip $x
-
- echo " validating and prettifying $u..."
- xmllint --format --dtdvalid $TEIDTD $u > $t
+ echo " prettifying..."
+ #xmllint --format $t0 > $t # geht out of memory und --stream machen funktioniert nicht
+ xml_pp $t0 > $t # works but takes ages
ls -l $t
+
+ echo " validating..."
+ xmllint --stream --noout --dtdvalid $TEIDTD $t # scheint so zu funktioneren - nicht kombinieren mit --format!
- echo " removing $u..."
- rm $u;
- echo " zipping $t..."
- gzip $t
+ # echo " zipping $x..."
+ # gzip -f $x
- echo ) &
+ # echo " zipping $t..."
+ # gzip -f $t
+
+ echo # ) &
done
- wait # ) &
+ # wait # ) & # wait does not seem to make sense if nothing follows
done
# wait