blob: a033dcbb069d8369ef94251d48e28459bdf7d9cd [file] [log] [blame]
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01001#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +01002#include <typeinfo>
Marc Kupietz4b799e92018-01-02 11:04:56 +01003#include "rocksdb/db.h"
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01004#endif
5#include <stdint.h>
Marc Kupietz4b799e92018-01-02 11:04:56 +01006
7#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
8#define encodeCollocation(w1, w2, dist) (((uint64_t)dist << 56) | ((uint64_t)w2 << 24) | w1)
9#define W1(key) (uint64_t)(key & 0xffffff)
10#define W2(key) (uint64_t)((key >> 24) & 0xffffff)
11#define DIST(key) (int8_t)((uint64_t)((key >> 56) & 0xff))
12
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010013#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +010014namespace rocksdb {
Marc Kupietz3400aa52018-06-05 10:28:55 +020015 class Collocator {
16 public:
17 uint64_t w2;
Marc Kupietzcc6c4592019-01-23 10:11:23 +010018 uint64_t f2;
Marc Kupietz3400aa52018-06-05 10:28:55 +020019 uint64_t raw;
20 double pmi;
21 double npmi;
22 double llr;
23 double lfmd;
Marc Kupietz41880452019-01-22 15:29:06 +010024 double md;
Marc Kupietz3400aa52018-06-05 10:28:55 +020025 double left_lfmd;
26 double right_lfmd;
27 double left_npmi;
28 double right_npmi;
Marc Kupietz41880452019-01-22 15:29:06 +010029 double dice;
30 double logdice;
Marc Kupietz3203e4c2019-02-04 12:42:45 +010031 double ldaf;
Marc Kupietze9f58932019-01-24 15:12:59 +010032 int window;
33 int af_window;
Marc Kupietz3400aa52018-06-05 10:28:55 +020034 };
Marc Kupietz0421d092021-03-13 18:05:14 +010035namespace rocksdb {
36 class Collocator {
37 public:
38 uint32_t w2;
39 uint64_t f2;
40 uint64_t raw;
41 double pmi;
42 double npmi;
43 double llr;
44 double lfmd;
45 double md;
46 uint64_t left_raw;
47 uint64_t right_raw;
48 double left_pmi;
49 double right_pmi;
50 double dice;
51 double logdice;
52 double ldaf;
53 int window;
54 int af_window;
55 };
Marc Kupietz3400aa52018-06-05 10:28:55 +020056
Marc Kupietz4b799e92018-01-02 11:04:56 +010057 class CollocatorIterator : public Iterator {
58 public:
59 CollocatorIterator(const Iterator& it);
60 void SeekToFirst();
61 void SeekToLast();
62 void Seek(const rocksdb::Slice&);
63 void Prev();
64 bool isValid();
65 uint64_t intValue();
66 uint64_t intKey();
67 };
68
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010069 extern "C" {
Marc Kupietz6aec7682018-01-10 09:47:48 +010070 class CollocatorDB {
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010071 public:
Marc Kupietz3400aa52018-06-05 10:28:55 +020072 std::string getWord(uint32_t w1);
73 std::vector<Collocator> get_collocators(uint32_t w1);
Marc Kupietzbd966192018-10-13 14:14:37 +020074 std::vector<Collocator> get_collocators(uint32_t w1, uint32_t max_w2);
Marc Kupietz3400aa52018-06-05 10:28:55 +020075 void dumpSparseLlr(uint32_t w1, uint32_t min_cooccur);
76 CollocatorDB(const char *db_name, const bool read_only);
Marc Kupietz4b799e92018-01-02 11:04:56 +010077 void inc(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010078 void dump(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz4b799e92018-01-02 11:04:56 +010079 CollocatorIterator* SeekIterator(uint64_t w1, uint64_t w2, int8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010080 };
81
82 }
Marc Kupietz4b799e92018-01-02 11:04:56 +010083}
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010084
Marc Kupietz6aec7682018-01-10 09:47:48 +010085typedef rocksdb::CollocatorDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010086
87#else
Marc Kupietz6aec7682018-01-10 09:47:48 +010088typedef struct COLLOCATORDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010089#endif
90
Marc Kupietz88d116b2021-03-13 18:05:14 +010091extern COLLOCATORDB *open_collocatordb(const char *s);
92extern COLLOCATORDB *open_collocatordb_for_write(const char *s);
Marc Kupietz6aec7682018-01-10 09:47:48 +010093extern void inc_collocator(COLLOCATORDB *db, uint64_t w1, uint64_t w2, int8_t dist);
94extern void dump_collocators(COLLOCATORDB *db, uint32_t w1, uint32_t w2, int8_t dist);
95extern void get_collocators(COLLOCATORDB *db, uint32_t w1);
Marc Kupietz88d116b2021-03-13 18:05:14 +010096extern void get_collocation_scores(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
Marc Kupietz6aec7682018-01-10 09:47:48 +010097extern char *get_collocators_as_json(COLLOCATORDB *db, uint32_t w1);
Marc Kupietz88d116b2021-03-13 18:05:14 +010098extern char *get_collocation_scores_as_json(COLLOCATORDB *db, uint32_t w1, uint32_t w2);
Marc Kupietzca3a52e2018-06-05 14:16:23 +020099extern char *get_word(COLLOCATORDB *db, uint32_t w1);