blob: ad9f0c698d36a3da11167057a8f202b84fa508d8 [file] [log] [blame]
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01001#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +01002#include <typeinfo>
Marc Kupietz4b799e92018-01-02 11:04:56 +01003#include "rocksdb/db.h"
Marc Kupietz06c9a9f2018-01-02 16:56:43 +01004#endif
5#include <stdint.h>
Marc Kupietz4b799e92018-01-02 11:04:56 +01006
7#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
8#define encodeCollocation(w1, w2, dist) (((uint64_t)dist << 56) | ((uint64_t)w2 << 24) | w1)
9#define W1(key) (uint64_t)(key & 0xffffff)
10#define W2(key) (uint64_t)((key >> 24) & 0xffffff)
11#define DIST(key) (int8_t)((uint64_t)((key >> 56) & 0xff))
12
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010013#ifdef __cplusplus
Marc Kupietz4b799e92018-01-02 11:04:56 +010014namespace rocksdb {
Marc Kupietz3400aa52018-06-05 10:28:55 +020015 class Collocator {
16 public:
17 uint64_t w2;
18 uint64_t raw;
19 double pmi;
20 double npmi;
21 double llr;
22 double lfmd;
23 double fpmi;
24 double left_lfmd;
25 double right_lfmd;
26 double left_npmi;
27 double right_npmi;
28 };
29
Marc Kupietz4b799e92018-01-02 11:04:56 +010030 class CollocatorIterator : public Iterator {
31 public:
32 CollocatorIterator(const Iterator& it);
33 void SeekToFirst();
34 void SeekToLast();
35 void Seek(const rocksdb::Slice&);
36 void Prev();
37 bool isValid();
38 uint64_t intValue();
39 uint64_t intKey();
40 };
41
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010042 extern "C" {
Marc Kupietz6aec7682018-01-10 09:47:48 +010043 class CollocatorDB {
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010044 public:
Marc Kupietz3400aa52018-06-05 10:28:55 +020045 std::string getWord(uint32_t w1);
46 std::vector<Collocator> get_collocators(uint32_t w1);
Marc Kupietzbd966192018-10-13 14:14:37 +020047 std::vector<Collocator> get_collocators(uint32_t w1, uint32_t max_w2);
Marc Kupietz3400aa52018-06-05 10:28:55 +020048 void dumpSparseLlr(uint32_t w1, uint32_t min_cooccur);
49 CollocatorDB(const char *db_name, const bool read_only);
Marc Kupietz4b799e92018-01-02 11:04:56 +010050 void inc(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010051 void dump(const uint32_t w1, const uint32_t w2, const uint8_t dist);
Marc Kupietz4b799e92018-01-02 11:04:56 +010052 CollocatorIterator* SeekIterator(uint64_t w1, uint64_t w2, int8_t dist);
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010053 };
54
55 }
Marc Kupietz4b799e92018-01-02 11:04:56 +010056}
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010057
Marc Kupietz6aec7682018-01-10 09:47:48 +010058typedef rocksdb::CollocatorDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010059
60#else
Marc Kupietz6aec7682018-01-10 09:47:48 +010061typedef struct COLLOCATORDB COLLOCATORDB;
Marc Kupietz06c9a9f2018-01-02 16:56:43 +010062#endif
63
Marc Kupietz6aec7682018-01-10 09:47:48 +010064extern COLLOCATORDB *open_collocatordb(char *s);
65extern COLLOCATORDB *open_collocatordb_for_write(char *s);
66extern void inc_collocator(COLLOCATORDB *db, uint64_t w1, uint64_t w2, int8_t dist);
67extern void dump_collocators(COLLOCATORDB *db, uint32_t w1, uint32_t w2, int8_t dist);
68extern void get_collocators(COLLOCATORDB *db, uint32_t w1);
69extern char *get_collocators_as_json(COLLOCATORDB *db, uint32_t w1);
Marc Kupietzca3a52e2018-06-05 14:16:23 +020070extern char *get_word(COLLOCATORDB *db, uint32_t w1);