Add -metadata-categories <n> option to support metadata input

(ignoring the metadata for now)

Change-Id: Iea84524400018eb2de842f6b4bdbed1808bc868b
diff --git a/src/dereko2vec.c b/src/dereko2vec.c
index 8c34179..bff3c4c 100644
--- a/src/dereko2vec.c
+++ b/src/dereko2vec.c
@@ -45,7 +45,7 @@
 
 struct vocab_word *vocab;
 int binary = 0, type = 1, debug_mode = 2, window = 5, min_count = 5,
-	num_threads = 12, min_reduce = 1;
+	num_threads = 12, min_reduce = 1, metadata_categories = 0, expected_metadata_categories = 0;
 int *vocab_hash;
 long long *threadPos;
 int *threadIters;
@@ -146,13 +146,21 @@
 		if (ch == 13)
 			continue;
 		if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
-			if (a > 0) {
-				if (ch == '\n')
-					ungetc(ch, fin);
-				break;
-			}
+			if (ch == '\t' && expected_metadata_categories > 0) {
+        a = 0;
+        expected_metadata_categories--;
+      } else {
+        if (a > 0) {
+          if (ch == '\n') {
+            expected_metadata_categories = metadata_categories;
+            ungetc(ch, fin);
+          }
+          break;
+        }
+      }
 			if (ch == '\n') {
 				strcpy(word, (char *) "</s>");
+        expected_metadata_categories = metadata_categories;
 				return;
 			} else
 				continue;
@@ -369,6 +377,9 @@
 	}
 	vocab_size = 0;
 	AddWordToVocab((char *) "</s>");
+  for (int j=0; j < metadata_categories; j++) {
+    ReadWord(word, fin);
+  }
 	while (1) {
 		ReadWord(word, fin);
 		if (feof(fin))
@@ -2061,6 +2072,8 @@
 		min_count = atoi(argv[i + 1]);
 	if ((i = ArgPos((char *) "-classes", argc, argv)) > 0)
 		classes = atoi(argv[i + 1]);
+  if ((i = ArgPos((char *) "-metadata-categories", argc, argv)) > 0)
+    metadata_categories = atoi(argv[i + 1]);
 	if ((i = ArgPos((char *) "-cap", argc, argv)) > 0)
 		cap = atoi(argv[i + 1]);
 	if (type == 0 || type == 2 || type == 4)