margaretha | 9ddfb1a | 2021-10-08 11:19:43 +0200 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | |
| 3 | printHelp(){ |
| 4 | echo "Please use the following command:" |
| 5 | echo " ./compareVC.sh [Cosmas2 VC-file] [VC-file]" |
| 6 | echo "" |
| 7 | echo "The Cosmas2 virtual corpus should have the following format: " |
| 8 | echo " <text>DOL00/JAN.00504</text>" |
| 9 | echo "" |
| 10 | echo "The other VC should contain a simple list of text Sigle, i.e. one text sigle per line. In the following format:" |
| 11 | echo " DOL00/APR/00055" |
| 12 | } |
| 13 | |
| 14 | vc1=$1 |
| 15 | vc2=$2 |
| 16 | |
| 17 | if [ -z $1 ]||[ -z $2 ]; |
| 18 | then |
| 19 | printHelp |
| 20 | exit |
| 21 | fi |
| 22 | |
| 23 | firstLine="$(head -n 1 $vc1)" |
| 24 | |
| 25 | if ! [[ $firstLine =~ ^\<text\> ]]; |
| 26 | then |
| 27 | printHelp |
| 28 | exit |
| 29 | fi |
| 30 | |
| 31 | cat $vc1 | sed -E 's/<\/?text>//g' - | sed 's/\./\//' -| sort > vc1 |
| 32 | |
| 33 | cat $vc2 | sort > vc2 |
| 34 | |
| 35 | echo $1 |
| 36 | wc -l vc1 |
| 37 | echo $2 |
| 38 | wc -l vc2 |
| 39 | |
| 40 | meld vc1 vc2 |