blob: 00b37f9c002a9160998c0f44be680b067dde465d [file] [log] [blame]
margaretha9ddfb1a2021-10-08 11:19:43 +02001#!/bin/bash
2
3printHelp(){
4 echo "Please use the following command:"
5 echo " ./compareVC.sh [Cosmas2 VC-file] [VC-file]"
6 echo ""
7 echo "The Cosmas2 virtual corpus should have the following format: "
8 echo " <text>DOL00/JAN.00504</text>"
9 echo ""
10 echo "The other VC should contain a simple list of text Sigle, i.e. one text sigle per line. In the following format:"
11 echo " DOL00/APR/00055"
12}
13
14vc1=$1
15vc2=$2
16
17if [ -z $1 ]||[ -z $2 ];
18then
19 printHelp
20 exit
21fi
22
23firstLine="$(head -n 1 $vc1)"
24
25if ! [[ $firstLine =~ ^\<text\> ]];
26then
27 printHelp
28 exit
29fi
30
31cat $vc1 | sed -E 's/<\/?text>//g' - | sed 's/\./\//' -| sort > vc1
32
33cat $vc2 | sort > vc2
34
35echo $1
36wc -l vc1
37echo $2
38wc -l vc2
39
40meld vc1 vc2