Extend API
Change-Id: I326b2074645f43f81b68cd39eacfe7e247138b41
diff --git a/README.md b/README.md
index 31b59f2..f89ceb6 100644
--- a/README.md
+++ b/README.md
@@ -30,13 +30,36 @@
```
## Provided API
```
-COLLOCATORDB *open_collocatordb(const char *path_to_rocksdb_without_extension);
-COLLOCATORDB *open_collocatordb_for_write(const char *path_to_rocksdb_without_extension);
+typedef struct {
+ uint32_t w2;
+ uint64_t f2;
+ uint64_t raw;
+ double pmi;
+ double npmi;
+ double llr;
+ double lfmd;
+ double md;
+ uint64_t left_raw;
+ uint64_t right_raw;
+ double left_pmi;
+ double right_pmi;
+ double dice;
+ double logdice;
+ double ldaf;
+ int window;
+ int af_window;
+} Collocator ;
+
+COLLOCATORDB *open_collocatordb(const char *s);
+COLLOCATORDB *open_collocatordb_for_write(const char *s);
void inc_collocator(COLLOCATORDB *db, uint64_t w1, uint64_t w2, int8_t dist);
void dump_collocators(COLLOCATORDB *db, uint32_t w1, uint32_t w2, int8_t dist);
+Collocator *get_collocators(COLLOCATORDB *db, uint32_t w1);
+Collocator *get_collocation_scores(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
char *get_collocators_as_json(COLLOCATORDB *db, uint32_t w1);
char *get_collocation_scores_as_json(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
char *get_word(COLLOCATORDB *db, uint32_t w1);
+void read_vocab(COLLOCATORDB *db, char *fname);
```
## TODO
diff --git a/src/collocatordb.cc b/src/collocatordb.cc
index c611994..eef1860 100644
--- a/src/collocatordb.cc
+++ b/src/collocatordb.cc
@@ -320,9 +320,9 @@
std::shared_ptr<DB> OpenDbForRead(const char *dbname);
- void read_vocab(string fname);
public:
+ void readVocab(string fname);
string getWord(uint32_t w1);
CollocatorDB(const char *db_name, bool read_only);
@@ -461,7 +461,7 @@
inc(encodeCollocation(w1, w2, dist));
}
- void rocksdb::CollocatorDB::read_vocab(string fname) {
+ void rocksdb::CollocatorDB::readVocab(string fname) {
char strbuf[2048];
uint64_t freq;
FILE *fin = fopen(fname.c_str(), "rb");
@@ -496,7 +496,7 @@
total, sentences, sl, avg_window_size);
fclose(fp);
} else {
- std::cout << "size file " << size_fname << " not found\n";
+ // std::cout << "size file " << size_fname << " not found\n";
}
} else {
std::cout << "cannot determine size file " << size_fname << "\n";
@@ -522,7 +522,7 @@
assert(false);
}
vocabname << name << ".vocab";
- read_vocab(vocabname.str());
+ readVocab(vocabname.str());
return std::shared_ptr<DB>(db);
}
@@ -562,6 +562,7 @@
std::cerr << s.ToString() << std::endl;
assert(false);
}
+ total = 1000;
return std::shared_ptr<DB>(db);
}
@@ -857,18 +858,23 @@
db->dump(w1, w2, dist);
}
- void get_collocators(COLLOCATORS *db, uint32_t w1) {
- db->get_collocators(w1);
+ Collocator *get_collocators(COLLOCATORS *db, uint32_t w1) {
+ return &db->get_collocators(w1)[0];
}
- void get_collocation_scores(COLLOCATORS *db, uint32_t w1, uint32_t w2) {
- db->get_collocation_scores(w1, w2);
+ Collocator *get_collocation_scores(COLLOCATORS *db, uint32_t w1, uint32_t w2) {
+ return &db->get_collocation_scores(w1, w2)[0];
}
const char *get_word(COLLOCATORS *db, uint32_t w) {
return strdup(db->getWord(w).c_str());
}
+ void read_vocab(COLLOCATORS *db, char *fname) {
+ std::string fName(fname);
+ db->readVocab(fName);
+ }
+
const char *get_collocators_as_json(COLLOCATORS *db, uint32_t w1) {
return strdup(db->collocators2json(w1, db->get_collocators(w1)).c_str());
}
@@ -876,6 +882,7 @@
const char *get_collocation_scores_as_json(COLLOCATORS *db, uint32_t w1, uint32_t w2) {
return strdup(db->collocators2json(w1, db->get_collocation_scores(w1, w2)).c_str());
}
+
#ifdef __clang__
#pragma clang diagnostic push
#endif
diff --git a/src/collocatordb.h b/src/collocatordb.h
index a033dcb..4e1d412 100644
--- a/src/collocatordb.h
+++ b/src/collocatordb.h
@@ -86,14 +86,35 @@
#else
typedef struct COLLOCATORDB COLLOCATORDB;
+typedef struct {
+ uint32_t w2;
+ uint64_t f2;
+ uint64_t raw;
+ double pmi;
+ double npmi;
+ double llr;
+ double lfmd;
+ double md;
+ uint64_t left_raw;
+ uint64_t right_raw;
+ double left_pmi;
+ double right_pmi;
+ double dice;
+ double logdice;
+ double ldaf;
+ int window;
+ int af_window;
+} Collocator ;
#endif
extern COLLOCATORDB *open_collocatordb(const char *s);
extern COLLOCATORDB *open_collocatordb_for_write(const char *s);
extern void inc_collocator(COLLOCATORDB *db, uint64_t w1, uint64_t w2, int8_t dist);
extern void dump_collocators(COLLOCATORDB *db, uint32_t w1, uint32_t w2, int8_t dist);
-extern void get_collocators(COLLOCATORDB *db, uint32_t w1);
-extern void get_collocation_scores(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
+extern Collocator *get_collocators(COLLOCATORDB *db, uint32_t w1);
+extern Collocator *get_collocation_scores(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
extern char *get_collocators_as_json(COLLOCATORDB *db, uint32_t w1);
extern char *get_collocation_scores_as_json(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
extern char *get_word(COLLOCATORDB *db, uint32_t w1);
+extern void read_vocab(COLLOCATORDB *db, char *fname);
+