blob: dae2047485b6650eceaf9ef67e642ea67a7d790a [file] [log] [blame]
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01001#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +01002#include <typeinfo>
Marc Kupietz4b799e92018-01-02 11:04:56 +01003#include "rocksdb/db.h"
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01004#endif
5#include <stdint.h>
Marc Kupietz4b799e92018-01-02 11:04:56 +01006
7#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
8#define encodeCollocation(w1, w2, dist) (((uint64_t)dist << 56) | ((uint64_t)w2 << 24) | w1)
9#define W1(key) (uint64_t)(key & 0xffffff)
10#define W2(key) (uint64_t)((key >> 24) & 0xffffff)
11#define DIST(key) (int8_t)((uint64_t)((key >> 56) & 0xff))
12
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010013#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +010014namespace rocksdb {
Marc Kupietz3400aa52018-06-05 10:28:55 +020015 class Collocator {
16 public:
17 uint64_t w2;
Marc Kupietzcc6c4592019-01-23 10:11:23 +010018 uint64_t f2;
Marc Kupietz3400aa52018-06-05 10:28:55 +020019 uint64_t raw;
20 double pmi;
21 double npmi;
22 double llr;
23 double lfmd;
Marc Kupietz41880452019-01-22 15:29:06 +010024 double md;
Marc Kupietz3400aa52018-06-05 10:28:55 +020025 double left_lfmd;
26 double right_lfmd;
27 double left_npmi;
28 double right_npmi;
Marc Kupietz41880452019-01-22 15:29:06 +010029 double dice;
30 double logdice;
Marc Kupietz3400aa52018-06-05 10:28:55 +020031 };
32
Marc Kupietz4b799e92018-01-02 11:04:56 +010033 class CollocatorIterator : public Iterator {
34 public:
35 CollocatorIterator(const Iterator& it);
36 void SeekToFirst();
37 void SeekToLast();
38 void Seek(const rocksdb::Slice&);
39 void Prev();
40 bool isValid();
41 uint64_t intValue();
42 uint64_t intKey();
43 };
44
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010045 extern "C" {
Marc Kupietz6aec7682018-01-10 09:47:48 +010046 class CollocatorDB {
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010047 public:
Marc Kupietz3400aa52018-06-05 10:28:55 +020048 std::string getWord(uint32_t w1);
49 std::vector<Collocator> get_collocators(uint32_t w1);
Marc Kupietzbd966192018-10-13 14:14:37 +020050 std::vector<Collocator> get_collocators(uint32_t w1, uint32_t max_w2);
Marc Kupietz3400aa52018-06-05 10:28:55 +020051 void dumpSparseLlr(uint32_t w1, uint32_t min_cooccur);
52 CollocatorDB(const char *db_name, const bool read_only);
Marc Kupietz4b799e92018-01-02 11:04:56 +010053 void inc(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010054 void dump(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz4b799e92018-01-02 11:04:56 +010055 CollocatorIterator* SeekIterator(uint64_t w1, uint64_t w2, int8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010056 };
57
58 }
Marc Kupietz4b799e92018-01-02 11:04:56 +010059}
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010060
Marc Kupietz6aec7682018-01-10 09:47:48 +010061typedef rocksdb::CollocatorDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010062
63#else
Marc Kupietz6aec7682018-01-10 09:47:48 +010064typedef struct COLLOCATORDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010065#endif
66
Marc Kupietz6aec7682018-01-10 09:47:48 +010067extern COLLOCATORDB *open_collocatordb(char *s);
68extern COLLOCATORDB *open_collocatordb_for_write(char *s);
69extern void inc_collocator(COLLOCATORDB *db, uint64_t w1, uint64_t w2, int8_t dist);
70extern void dump_collocators(COLLOCATORDB *db, uint32_t w1, uint32_t w2, int8_t dist);
71extern void get_collocators(COLLOCATORDB *db, uint32_t w1);
72extern char *get_collocators_as_json(COLLOCATORDB *db, uint32_t w1);
Marc Kupietzca3a52e2018-06-05 14:16:23 +020073extern char *get_word(COLLOCATORDB *db, uint32_t w1);