| #!/bin/bash |
| |
| printHelp(){ |
| echo "Please use the following command:" |
| echo " ./compareVC.sh [Cosmas2 VC-file] [VC-file]" |
| echo "" |
| echo "The Cosmas2 virtual corpus should have the following format: " |
| echo " <text>DOL00/JAN.00504</text>" |
| echo "" |
| echo "The other VC should contain a simple list of text Sigle, i.e. one text sigle per line. In the following format:" |
| echo " DOL00/APR/00055" |
| } |
| |
| vc1=$1 |
| vc2=$2 |
| |
| if [ -z $1 ]||[ -z $2 ]; |
| then |
| printHelp |
| exit |
| fi |
| |
| firstLine="$(head -n 1 $vc1)" |
| |
| if ! [[ $firstLine =~ ^\<text\> ]]; |
| then |
| printHelp |
| exit |
| fi |
| |
| cat $vc1 | sed -E 's/<\/?text>//g' - | sed 's/\./\//' -| sort > vc1 |
| |
| cat $vc2 | sort > vc2 |
| |
| echo $1 |
| wc -l vc1 |
| echo $2 |
| wc -l vc2 |
| |
| meld vc1 vc2 |