blob: 00b37f9c002a9160998c0f44be680b067dde465d [file] [log] [blame]
#!/bin/bash
printHelp(){
echo "Please use the following command:"
echo " ./compareVC.sh [Cosmas2 VC-file] [VC-file]"
echo ""
echo "The Cosmas2 virtual corpus should have the following format: "
echo " <text>DOL00/JAN.00504</text>"
echo ""
echo "The other VC should contain a simple list of text Sigle, i.e. one text sigle per line. In the following format:"
echo " DOL00/APR/00055"
}
vc1=$1
vc2=$2
if [ -z $1 ]||[ -z $2 ];
then
printHelp
exit
fi
firstLine="$(head -n 1 $vc1)"
if ! [[ $firstLine =~ ^\<text\> ]];
then
printHelp
exit
fi
cat $vc1 | sed -E 's/<\/?text>//g' - | sed 's/\./\//' -| sort > vc1
cat $vc2 | sort > vc2
echo $1
wc -l vc1
echo $2
wc -l vc2
meld vc1 vc2