Marc Kupietz | 3400aa5 | 2018-06-05 10:28:55 +0200 | [diff] [blame] | 1 | #include <iostream> |
Marc Kupietz | 934a972 | 2024-11-21 14:33:49 +0100 | [diff] [blame^] | 2 | #include <vector> |
| 3 | #include <cstdint> |
| 4 | #include <string> |
Marc Kupietz | 3400aa5 | 2018-06-05 10:28:55 +0200 | [diff] [blame] | 5 | #include <sstream> // for ostringstream |
| 6 | |
Marc Kupietz | 934a972 | 2024-11-21 14:33:49 +0100 | [diff] [blame^] | 7 | #include <rocksdb/cache.h> |
| 8 | #include <thread> |
| 9 | #include "../src/collocatordb.h" |
Marc Kupietz | 3400aa5 | 2018-06-05 10:28:55 +0200 | [diff] [blame] | 10 | using namespace rocksdb; |
| 11 | |
Marc Kupietz | 934a972 | 2024-11-21 14:33:49 +0100 | [diff] [blame^] | 12 | |
Marc Kupietz | 3400aa5 | 2018-06-05 10:28:55 +0200 | [diff] [blame] | 13 | int main(int argc, char** argv) { |
| 14 | const int START=0; |
| 15 | const int STOP=1500000; |
| 16 | int done = 0; |
| 17 | CollocatorDB cdb = CollocatorDB(argv[1], true); |
| 18 | std::cerr << "Database " << argv[1] << " opened\n"; |
| 19 | |
| 20 | #pragma omp parallel for ordered schedule(static,1) |
| 21 | for(uint32_t i=START; i< STOP; i++) { |
| 22 | // cdb.dumpSparseLlr(i, 5); |
| 23 | std::vector<rocksdb::Collocator> cs = cdb.get_collocators(i); |
| 24 | std::stringstream stream; |
| 25 | // stream << i << "(" << cdb.getWord(i) << "): "; |
| 26 | if(cs.empty()) |
| 27 | stream << "0 0.0"; |
| 28 | for (rocksdb::Collocator c : cs) { |
| 29 | stream << c.w2 << " " << c.npmi << " "; |
| 30 | // stream << c.w2 << "(" << cdb.getWord(c.w2) << ") " << c.llr << " "; |
| 31 | if(c.raw < 5) |
| 32 | break; |
| 33 | } |
| 34 | stream << "\n"; |
| 35 | #pragma omp ordered |
| 36 | std::cout << stream.str(); |
| 37 | if(done++ % 100 == 0) { |
| 38 | std::cerr <<"\r\033[2K"<<std::flush; |
| 39 | std::cerr << "done: " << done * 100.0 / (STOP-START) << "%" <<std::flush; |
| 40 | } |
| 41 | } |
| 42 | std::cout << std::flush; |
| 43 | } |