blob: f2497912bd974592215d9580e70b64ee1e18985a [file] [log] [blame]
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01001#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +01002#include <typeinfo>
Marc Kupietz4b799e92018-01-02 11:04:56 +01003#include "rocksdb/db.h"
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01004#endif
5#include <stdint.h>
Marc Kupietz4b799e92018-01-02 11:04:56 +01006
7#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
8#define encodeCollocation(w1, w2, dist) (((uint64_t)dist << 56) | ((uint64_t)w2 << 24) | w1)
9#define W1(key) (uint64_t)(key & 0xffffff)
10#define W2(key) (uint64_t)((key >> 24) & 0xffffff)
11#define DIST(key) (int8_t)((uint64_t)((key >> 56) & 0xff))
12
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010013#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +010014namespace rocksdb {
Marc Kupietz3400aa52018-06-05 10:28:55 +020015 class Collocator {
16 public:
17 uint64_t w2;
Marc Kupietzcc6c4592019-01-23 10:11:23 +010018 uint64_t f2;
Marc Kupietz3400aa52018-06-05 10:28:55 +020019 uint64_t raw;
20 double pmi;
21 double npmi;
22 double llr;
23 double lfmd;
Marc Kupietz41880452019-01-22 15:29:06 +010024 double md;
Marc Kupietz3400aa52018-06-05 10:28:55 +020025 double left_lfmd;
26 double right_lfmd;
27 double left_npmi;
28 double right_npmi;
Marc Kupietz41880452019-01-22 15:29:06 +010029 double dice;
30 double logdice;
Marc Kupietz3203e4c2019-02-04 12:42:45 +010031 double ldaf;
Marc Kupietze9f58932019-01-24 15:12:59 +010032 int window;
33 int af_window;
Marc Kupietz3400aa52018-06-05 10:28:55 +020034 };
35
Marc Kupietz4b799e92018-01-02 11:04:56 +010036 class CollocatorIterator : public Iterator {
37 public:
38 CollocatorIterator(const Iterator& it);
39 void SeekToFirst();
40 void SeekToLast();
41 void Seek(const rocksdb::Slice&);
42 void Prev();
43 bool isValid();
44 uint64_t intValue();
45 uint64_t intKey();
46 };
47
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010048 extern "C" {
Marc Kupietz6aec7682018-01-10 09:47:48 +010049 class CollocatorDB {
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010050 public:
Marc Kupietz3400aa52018-06-05 10:28:55 +020051 std::string getWord(uint32_t w1);
52 std::vector<Collocator> get_collocators(uint32_t w1);
Marc Kupietzbd966192018-10-13 14:14:37 +020053 std::vector<Collocator> get_collocators(uint32_t w1, uint32_t max_w2);
Marc Kupietz3400aa52018-06-05 10:28:55 +020054 void dumpSparseLlr(uint32_t w1, uint32_t min_cooccur);
55 CollocatorDB(const char *db_name, const bool read_only);
Marc Kupietz4b799e92018-01-02 11:04:56 +010056 void inc(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010057 void dump(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz4b799e92018-01-02 11:04:56 +010058 CollocatorIterator* SeekIterator(uint64_t w1, uint64_t w2, int8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010059 };
60
61 }
Marc Kupietz4b799e92018-01-02 11:04:56 +010062}
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010063
Marc Kupietz6aec7682018-01-10 09:47:48 +010064typedef rocksdb::CollocatorDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010065
66#else
Marc Kupietz6aec7682018-01-10 09:47:48 +010067typedef struct COLLOCATORDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010068#endif
69
Marc Kupietz88d116b2021-03-13 18:05:14 +010070extern COLLOCATORDB *open_collocatordb(const char *s);
71extern COLLOCATORDB *open_collocatordb_for_write(const char *s);
Marc Kupietz6aec7682018-01-10 09:47:48 +010072extern void inc_collocator(COLLOCATORDB *db, uint64_t w1, uint64_t w2, int8_t dist);
73extern void dump_collocators(COLLOCATORDB *db, uint32_t w1, uint32_t w2, int8_t dist);
74extern void get_collocators(COLLOCATORDB *db, uint32_t w1);
Marc Kupietz88d116b2021-03-13 18:05:14 +010075extern void get_collocation_scores(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
Marc Kupietz6aec7682018-01-10 09:47:48 +010076extern char *get_collocators_as_json(COLLOCATORDB *db, uint32_t w1);
Marc Kupietz88d116b2021-03-13 18:05:14 +010077extern char *get_collocation_scores_as_json(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
Marc Kupietzca3a52e2018-06-05 14:16:23 +020078extern char *get_word(COLLOCATORDB *db, uint32_t w1);