Add some directory structure
Change-Id: I052849c6aff9fd0f311984de67c35a306735c9a9
diff --git a/examples/c_testcdb.c b/examples/c_testcdb.c
new file mode 100644
index 0000000..b143ccd
--- /dev/null
+++ b/examples/c_testcdb.c
@@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "../src/collocatordb.h"
+
+int main() {
+ COLLOCATORDB *cdb = open_collocatordb_for_write("/tmp/test.rocksdb");
+ inc_collocator(cdb, 2000, 2000, 4);
+ inc_collocator(cdb, 2000, 2001, 4);
+ inc_collocator(cdb, 2000, 2002, 4);
+ dump_collocators(cdb, 2000, 0, 0);
+ return 0;
+}
diff --git a/examples/dumpllr.cc b/examples/dumpllr.cc
new file mode 100644
index 0000000..69a9389
--- /dev/null
+++ b/examples/dumpllr.cc
@@ -0,0 +1,44 @@
+#include <typeinfo>
+#include <assert.h>
+#include <memory>
+#include <iostream>
+#include <stdint.h>
+#include "../src/collocatordb.h"
+#include <thread>
+#include <chrono>
+#include <sstream> // for ostringstream
+
+using namespace rocksdb;
+
+
+int main(int argc, char** argv) {
+ const int START=0;
+ const int STOP=1500000;
+ int done = 0;
+ CollocatorDB cdb = CollocatorDB(argv[1], true);
+ std::cerr << "Database " << argv[1] << " opened\n";
+
+ #pragma omp parallel for ordered schedule(static,1)
+ for(uint32_t i=START; i< STOP; i++) {
+ // cdb.dumpSparseLlr(i, 5);
+ std::vector<rocksdb::Collocator> cs = cdb.get_collocators(i);
+ std::stringstream stream;
+ // stream << i << "(" << cdb.getWord(i) << "): ";
+ if(cs.empty())
+ stream << "0 0.0";
+ for (rocksdb::Collocator c : cs) {
+ stream << c.w2 << " " << c.npmi << " ";
+ // stream << c.w2 << "(" << cdb.getWord(c.w2) << ") " << c.llr << " ";
+ if(c.raw < 5)
+ break;
+ }
+ stream << "\n";
+ #pragma omp ordered
+ std::cout << stream.str();
+ if(done++ % 100 == 0) {
+ std::cerr <<"\r\033[2K"<<std::flush;
+ std::cerr << "done: " << done * 100.0 / (STOP-START) << "%" <<std::flush;
+ }
+ }
+ std::cout << std::flush;
+}
diff --git a/examples/dumppmicubed.cc b/examples/dumppmicubed.cc
new file mode 100644
index 0000000..8c3bf56
--- /dev/null
+++ b/examples/dumppmicubed.cc
@@ -0,0 +1,46 @@
+#include <typeinfo>
+#include <assert.h>
+#include <memory>
+#include <iostream>
+#include <stdint.h>
+#include "../src/collocatordb.h"
+#include <thread>
+#include <chrono>
+#include <sstream> // for ostringstream
+#include <fstream>
+
+using namespace rocksdb;
+
+
+int main(int argc, char** argv) {
+ const int START=1;
+ const int STOP=300000;
+ uint32_t *array;
+ int done = 0;
+
+ array = (uint32_t *)malloc(STOP * 20 * sizeof(uint32_t));
+ memset(array, 0, STOP * 20 * sizeof(uint32_t));
+ FILE* pFile;
+ CollocatorDB cdb = CollocatorDB(argv[1], true);
+ std::cerr << "Database " << argv[1] << " opened\n";
+ #pragma omp parallel for schedule(dynamic, 1)
+ for(uint32_t i=START; i< STOP; i++) {
+ std::vector<rocksdb::Collocator> cs = cdb.get_collocators(i, STOP);
+ int j=0;
+ for (rocksdb::Collocator c : cs) {
+ if(c.w2 != i) {
+ array[i*20+j] = (uint32_t) c.w2;
+ if(++j >=20)
+ break;
+ }
+ }
+ if(done++ % 100 == 0) {
+ std::cerr <<"\r\033[2K"<<std::flush;
+ std::cerr << "done: " << done * 100.0 / (STOP-START) << "%" << " (todo: " << STOP-START-done << ")" << std::flush;
+ }
+ }
+ pFile = fopen("file.binary", "wb");
+ fwrite(array, sizeof(uint32_t), STOP*20, pFile);
+ fclose(pFile);
+ std::cout << std::flush;
+}
diff --git a/examples/hello_world.c b/examples/hello_world.c
new file mode 100644
index 0000000..fb4c5d1
--- /dev/null
+++ b/examples/hello_world.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "../src/collocatordb.h"
+
+char dbpath[] = "../models/dereko-2021-i";
+const int testword = 431; // Grund
+
+int main(int argc, char* argv[]) {
+ COLLOCATORDB *cdb;
+
+ fprintf(stderr, "opening collocatordb for reading: %s ...\n", dbpath);
+ if(!(cdb = open_collocatordb(dbpath))) {
+ fprintf(stderr, "Error opening %s exiting.\n", dbpath);
+ exit(1);
+ }
+ fprintf(stderr, "Successfully opened %s.\n", dbpath);
+
+ printf("associations between two words:\n %s", get_collocation_scores_as_json(cdb, 431, 218717));
+ /*
+ printf("raw dump of all “%s”-neighbour positions and frequencies:\n", get_word(cdb, testword));
+ dump_collocators(cdb, testword, 0, 0);
+ */
+
+ printf("printing collocators of “%s” as json:\n", get_word(cdb, testword));
+ printf("%s\n", get_collocators_as_json(cdb, testword));
+
+ return 0;
+}
diff --git a/examples/testcdb.cc b/examples/testcdb.cc
new file mode 100644
index 0000000..64e9304
--- /dev/null
+++ b/examples/testcdb.cc
@@ -0,0 +1,52 @@
+#include <typeinfo>
+#include <assert.h>
+#include <memory>
+#include <iostream>
+#include <stdint.h>
+#include "../src/collocatordb.h"
+using namespace rocksdb;
+
+void dumpDb(Collocators& counters) {
+ auto it = std::unique_ptr<CollocatorIterator>(counters.SeekIterator(1000,0,0));
+ for (; it->isValid(); it->Next()) {
+ uint64_t value = it->intValue();
+ uint64_t key = it->intKey();
+ std::cout << "w1:" << W1(key) << ", w2:" << W2(key) << ", dist:" << (int32_t) DIST(key) << " - count:" << value << std::endl;
+ }
+ std::cout << "ready dumping\n";
+ }
+
+ void testCollocators(Collocators& counters) {
+ counters.inc(100,200,5);
+ counters.inc(1000,2000,-5);
+ counters.inc(1000,2000,5);
+ counters.inc(1000,2500,-3);
+ counters.inc(1000,2500,4);
+ counters.inc(1000,2900,3);
+
+ counters.inc(1001,2900,3);
+
+ for(int i=0; i<10000; i++)
+ counters.inc(rand()%1010,rand()%1010,rand()%10-5);
+
+ // dumpDb(db);
+
+ counters.inc(100,200,5);
+ counters.inc(1000,2000,5);
+ counters.inc(1000,2500,4);
+ counters.inc(1000,2900,3);
+
+ counters.inc(1001,2900,3);
+
+ dumpDb(counters);
+ std::cout << "ready testing\n";
+ }
+
+
+int main() {
+ Collocators counters = "/tmp/cdb";
+ std::cout << "testing now\n";
+ testCollocators(counters);
+ std::cout << "ready running\n";
+ return 0;
+}