Save metadata in standard dictionary
Change-Id: I4cdd46783094e98e9a9a516a1bcb66faa2d4773d
diff --git a/src/dereko2vec.c b/src/dereko2vec.c
index bff3c4c..3671bd6 100644
--- a/src/dereko2vec.c
+++ b/src/dereko2vec.c
@@ -27,7 +27,9 @@
#define MAX_SENTENCE_LENGTH 1000
#define MAX_CC 100
#define MAX_CODE_LENGTH 40
+#define MAX_METADATA_CATEGORIES 4
+#define METADATA_MARKER ' '
const int vocab_hash_size = 30000000; // Maximum 30 * 0.7 = 21M words in the vocabulary
typedef float real; // Precision of float numbers
@@ -147,8 +149,14 @@
continue;
if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
if (ch == '\t' && expected_metadata_categories > 0) {
- a = 0;
- expected_metadata_categories--;
+ word[a] = 0;
+ a = 0;
+ expected_metadata_categories--;
+ if (debug_mode > 2)
+ printf("Metadata: %s\n", word);
+ strcpy(word + 1, word);
+ *word = METADATA_MARKER;
+ return;
} else {
if (a > 0) {
if (ch == '\n') {
@@ -2073,7 +2081,16 @@
if ((i = ArgPos((char *) "-classes", argc, argv)) > 0)
classes = atoi(argv[i + 1]);
if ((i = ArgPos((char *) "-metadata-categories", argc, argv)) > 0)
- metadata_categories = atoi(argv[i + 1]);
+ metadata_categories = atoi(argv[i + 1]);
+ if ((i = ArgPos((char *) "-metadata-categories", argc, argv)) > 0) {
+ metadata_categories = atoi(argv[i + 1]);
+ if (metadata_categories > MAX_METADATA_CATEGORIES) {
+ printf("ERROR: metadata categories must be <= %d\n", MAX_METADATA_CATEGORIES);
+ exit(1);
+ }
+ for (int j = 0; j <= metadata_categories; j++) {
+ }
+ }
if ((i = ArgPos((char *) "-cap", argc, argv)) > 0)
cap = atoi(argv[i + 1]);
if (type == 0 || type == 2 || type == 4)