blob: 70c6c11a7a78ce65d5d2c7b4afe7390ab240095f [file] [log] [blame]
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01001#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +01002#include <typeinfo>
Marc Kupietz4b799e92018-01-02 11:04:56 +01003#include "rocksdb/db.h"
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01004#endif
5#include <stdint.h>
Marc Kupietz4b799e92018-01-02 11:04:56 +01006
7#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
8#define encodeCollocation(w1, w2, dist) (((uint64_t)dist << 56) | ((uint64_t)w2 << 24) | w1)
9#define W1(key) (uint64_t)(key & 0xffffff)
10#define W2(key) (uint64_t)((key >> 24) & 0xffffff)
11#define DIST(key) (int8_t)((uint64_t)((key >> 56) & 0xff))
12
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010013#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +010014namespace rocksdb {
Marc Kupietz3400aa52018-06-05 10:28:55 +020015 class Collocator {
16 public:
17 uint64_t w2;
18 uint64_t raw;
19 double pmi;
20 double npmi;
21 double llr;
22 double lfmd;
Marc Kupietz41880452019-01-22 15:29:06 +010023 double md;
Marc Kupietz3400aa52018-06-05 10:28:55 +020024 double left_lfmd;
25 double right_lfmd;
26 double left_npmi;
27 double right_npmi;
Marc Kupietz41880452019-01-22 15:29:06 +010028 double dice;
29 double logdice;
Marc Kupietz3400aa52018-06-05 10:28:55 +020030 };
31
Marc Kupietz4b799e92018-01-02 11:04:56 +010032 class CollocatorIterator : public Iterator {
33 public:
34 CollocatorIterator(const Iterator& it);
35 void SeekToFirst();
36 void SeekToLast();
37 void Seek(const rocksdb::Slice&);
38 void Prev();
39 bool isValid();
40 uint64_t intValue();
41 uint64_t intKey();
42 };
43
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010044 extern "C" {
Marc Kupietz6aec7682018-01-10 09:47:48 +010045 class CollocatorDB {
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010046 public:
Marc Kupietz3400aa52018-06-05 10:28:55 +020047 std::string getWord(uint32_t w1);
48 std::vector<Collocator> get_collocators(uint32_t w1);
Marc Kupietzbd966192018-10-13 14:14:37 +020049 std::vector<Collocator> get_collocators(uint32_t w1, uint32_t max_w2);
Marc Kupietz3400aa52018-06-05 10:28:55 +020050 void dumpSparseLlr(uint32_t w1, uint32_t min_cooccur);
51 CollocatorDB(const char *db_name, const bool read_only);
Marc Kupietz4b799e92018-01-02 11:04:56 +010052 void inc(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010053 void dump(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz4b799e92018-01-02 11:04:56 +010054 CollocatorIterator* SeekIterator(uint64_t w1, uint64_t w2, int8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010055 };
56
57 }
Marc Kupietz4b799e92018-01-02 11:04:56 +010058}
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010059
Marc Kupietz6aec7682018-01-10 09:47:48 +010060typedef rocksdb::CollocatorDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010061
62#else
Marc Kupietz6aec7682018-01-10 09:47:48 +010063typedef struct COLLOCATORDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010064#endif
65
Marc Kupietz6aec7682018-01-10 09:47:48 +010066extern COLLOCATORDB *open_collocatordb(char *s);
67extern COLLOCATORDB *open_collocatordb_for_write(char *s);
68extern void inc_collocator(COLLOCATORDB *db, uint64_t w1, uint64_t w2, int8_t dist);
69extern void dump_collocators(COLLOCATORDB *db, uint32_t w1, uint32_t w2, int8_t dist);
70extern void get_collocators(COLLOCATORDB *db, uint32_t w1);
71extern char *get_collocators_as_json(COLLOCATORDB *db, uint32_t w1);
Marc Kupietzca3a52e2018-06-05 14:16:23 +020072extern char *get_word(COLLOCATORDB *db, uint32_t w1);