Added a script to compare two list of text sigles from KorAP and C2.

Change-Id: I705fb4fb8f27f06a822a4c728f579de1be9d3355
diff --git a/tools/compareVC.sh b/tools/compareVC.sh
new file mode 100755
index 0000000..00b37f9
--- /dev/null
+++ b/tools/compareVC.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+printHelp(){
+  echo "Please use the following command:"
+  echo "  ./compareVC.sh [Cosmas2 VC-file] [VC-file]"
+  echo ""
+  echo "The Cosmas2 virtual corpus should have the following format: "
+  echo "  <text>DOL00/JAN.00504</text>"
+  echo ""
+  echo "The other VC should contain a simple list of text Sigle, i.e. one text sigle per line. In the following format:"
+  echo "  DOL00/APR/00055"
+}
+
+vc1=$1
+vc2=$2
+
+if [ -z $1 ]||[ -z $2 ];
+then
+    printHelp
+    exit
+fi
+
+firstLine="$(head -n 1 $vc1)"
+
+if ! [[ $firstLine =~ ^\<text\> ]];
+then
+  printHelp
+  exit
+fi
+
+cat $vc1 | sed -E 's/<\/?text>//g' - |  sed 's/\./\//' -| sort > vc1
+
+cat $vc2 | sort > vc2
+
+echo $1
+wc -l vc1
+echo $2
+wc -l vc2
+
+meld vc1 vc2