Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 1 | #ifdef __cplusplus |
Marc Kupietz | 4b799e9 | 2018-01-02 11:04:56 +0100 | [diff] [blame] | 2 | #include <typeinfo> |
Marc Kupietz | 4b799e9 | 2018-01-02 11:04:56 +0100 | [diff] [blame] | 3 | #include "rocksdb/db.h" |
Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 4 | #endif |
| 5 | #include <stdint.h> |
Marc Kupietz | 4b799e9 | 2018-01-02 11:04:56 +0100 | [diff] [blame] | 6 | |
| 7 | #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) |
| 8 | #define encodeCollocation(w1, w2, dist) (((uint64_t)dist << 56) | ((uint64_t)w2 << 24) | w1) |
| 9 | #define W1(key) (uint64_t)(key & 0xffffff) |
| 10 | #define W2(key) (uint64_t)((key >> 24) & 0xffffff) |
| 11 | #define DIST(key) (int8_t)((uint64_t)((key >> 56) & 0xff)) |
| 12 | |
Marc Kupietz | c8ddf45 | 2018-01-07 21:33:12 +0100 | [diff] [blame] | 13 | |
| 14 | typedef struct { |
| 15 | uint64_t freq; |
| 16 | char *word; |
| 17 | } vocab_entry; |
| 18 | |
Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 19 | #ifdef __cplusplus |
Marc Kupietz | 4b799e9 | 2018-01-02 11:04:56 +0100 | [diff] [blame] | 20 | namespace rocksdb { |
| 21 | class CollocatorIterator : public Iterator { |
| 22 | public: |
| 23 | CollocatorIterator(const Iterator& it); |
| 24 | void SeekToFirst(); |
| 25 | void SeekToLast(); |
| 26 | void Seek(const rocksdb::Slice&); |
| 27 | void Prev(); |
| 28 | bool isValid(); |
| 29 | uint64_t intValue(); |
| 30 | uint64_t intKey(); |
| 31 | }; |
| 32 | |
Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 33 | extern "C" { |
| 34 | class Collocators { |
| 35 | public: |
Marc Kupietz | 4b799e9 | 2018-01-02 11:04:56 +0100 | [diff] [blame] | 36 | Collocators(const char *db_name); |
| 37 | ~Collocators(); |
| 38 | void inc(const uint32_t w1, const uint32_t w2, const uint8_t dist); |
Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 39 | void dump(const uint32_t w1, const uint32_t w2, const uint8_t dist); |
Marc Kupietz | 4b799e9 | 2018-01-02 11:04:56 +0100 | [diff] [blame] | 40 | CollocatorIterator* SeekIterator(uint64_t w1, uint64_t w2, int8_t dist); |
Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 41 | }; |
| 42 | |
| 43 | } |
Marc Kupietz | 4b799e9 | 2018-01-02 11:04:56 +0100 | [diff] [blame] | 44 | } |
Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 45 | |
| 46 | typedef rocksdb::Collocators COLLOCATORS; |
| 47 | |
| 48 | #else |
| 49 | typedef struct COLLOCATORS COLLOCATORS; |
| 50 | #endif |
| 51 | |
| 52 | extern COLLOCATORS *open_collocators(char *s); |
Marc Kupietz | 6bb2776 | 2018-01-09 17:53:01 +0100 | [diff] [blame^] | 53 | extern COLLOCATORS *open_collocators_for_read(char *s); |
Marc Kupietz | 06c9a9f | 2018-01-02 16:56:43 +0100 | [diff] [blame] | 54 | extern void inc_collocators(COLLOCATORS *db, uint64_t w1, uint64_t w2, int8_t dist); |
| 55 | extern void dump_collocators(COLLOCATORS *db, uint32_t w1, uint32_t w2, int8_t dist); |
Marc Kupietz | c8ddf45 | 2018-01-07 21:33:12 +0100 | [diff] [blame] | 56 | extern void get_collocators(COLLOCATORS *db, uint32_t w1, vocab_entry *vocab, uint64_t total); |
| 57 | extern char *get_collocators_as_json(COLLOCATORS *db, uint32_t w1, vocab_entry *vocab, uint64_t total); |
| 58 | |