blob: d58dbe6031dbf6a6d16bcfa0cf587f50307bb2cf [file] [log] [blame]
Marc Kupietz6663f112021-03-14 09:20:59 +01001#include <stdio.h>
2#include <string.h>
3#define __USE_XOPEN_EXTENDED
4#include <ftw.h>
5#include "../src/collocatordb.h"
6#include "acutest.h"
7
8char dbpath[] = "../tests/data/wpd19_10000";
9const int testword = 10; // ist
10
11void test_open_db() {
12 COLLOCATORDB *cdb;
13
14 cdb = open_collocatordb(dbpath);
15 TEST_ASSERT(cdb != NULL);
16}
17
18void test_get_word() {
19 COLLOCATORDB *cdb;
20
21 cdb = open_collocatordb(dbpath);
22 TEST_ASSERT(cdb != NULL);
23 char *word = get_word(cdb, testword);
24 char *expected = "ist";
25 TEST_CHECK(strcmp(word, expected) == 0);
26 TEST_MSG("Expected: %s", expected);
27 TEST_MSG("Produced: %s", word);
28}
29
30void test_collocation_scores() {
31 COLLOCATORDB *cdb;
32
33 cdb = open_collocatordb(dbpath);
34 TEST_ASSERT(cdb != NULL);
35 char *expected = " { \"f1\": 217,\"w1\":\"Aluminium\", \"N\": 152744, \"collocates\": [{\"word\":\"Anwendungstechnologie\",\"f2\":16,\"f\":16,\"npmi\":0.594849,\"pmi\":8.4592,\"llr\":188.227,\"lfmd\":16.4592,\"md\":12.4592,\"dice\":0.0711111,\"ld\":10.1862,\"ln_count\":16,\"rn_count\":0,\"ln_pmi\":9.4592,\"rn_pmi\":-1,\"ldaf\":11.1358,\"win\":32,\"afwin\":32}]}\n";
36 char *produced = get_collocation_scores_as_json(cdb, 62, 966);
37 TEST_CHECK(strcmp(produced, expected) == 0);
38 TEST_MSG("Expected: %s", expected);
39 TEST_MSG("Produced: %s", produced);
40}
41
42
43void test_collocation_analysis_as_json() {
44 COLLOCATORDB *cdb;
45
46 cdb = open_collocatordb(dbpath);
47 TEST_ASSERT(cdb != NULL);
48 char *json = get_collocators_as_json(cdb, testword);
49 char *needle = "\"word\":\"um\",\"f2\":264,\"f\":5,\"npmi\":-0.0556343,\"pmi\":-0.958064,\"llr\":2.87717,\"lfmd\":3.68579,\"md\":1.36386,\"dice\":0.00169952,\"ld\":4.79935,\"ln_count\":0,\"rn_count\":1,\"ln_pmi\":-1,\"rn_pmi\":-1,\"ldaf\":4.79935,\"win\":668,\"afwin\":668";
50 TEST_CHECK(strstr(json, needle) > 0);
51 TEST_MSG("Expected to contain: %s", needle);
52 const COLLOCATOR *c = get_collocators(cdb, 62);
53 printf("%s %lu\n", get_word(cdb,c[0].w2), c[0].f2);
54}
55
56void test_collocation_analysis() {
57 COLLOCATORDB *cdb;
58
59 cdb = open_collocatordb(dbpath);
60 TEST_ASSERT(cdb != NULL);
61 char *expected = "Anwendungstechnologie";
62 const COLLOCATOR *c = get_collocators(cdb, 62);
63 char *produced = get_word(cdb,c[0].w2);
64 TEST_CHECK(strcmp(produced, expected) == 0);
65 TEST_MSG("Expected: %s", expected);
66 TEST_MSG("Produced: %s", produced);
67 printf("%s\n", get_collocators_as_json(cdb,62));
68
69}
70
71int unlink_cb(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
72 int rv = remove(fpath);
73 if (rv)
74 perror(fpath);
75 return rv;
76}
77
78int rmrf(char *path) {
79 return nftw(path, unlink_cb, 64, FTW_DEPTH | FTW_PHYS);
80}
81
82void test_writing() {
83 char *tmp = tempnam(NULL, NULL);
84 long size = 0;
85
86 printf("%s\n", acutest_argv0_);
87
88 char *rocksdbfn = malloc(strlen(tmp)+strlen(".rocksdb"));
89 strcpy (rocksdbfn, tmp);
90 strcat(rocksdbfn, ".rocksdb");
91 COLLOCATORDB *cdb = open_collocatordb_for_write(rocksdbfn);
92
93 char *vocabfn = malloc(strlen(tmp)+strlen(".vocab"));
94 strcpy(vocabfn, tmp);
95 strcat(vocabfn, ".vocab");
96 FILE *h = fopen(vocabfn, "w");
97 fprintf(h, "word0 2000\n");
98 fprintf(h, "word1 2000\n");
99 fprintf(h, "word2 2000\n");
100 fclose(h);
101 read_vocab(cdb, vocabfn);
102 inc_collocator(cdb, 0, 1, 4); size++;
103 for (int i=0; i < 1000; i++) {
104 inc_collocator(cdb, 0, 1, i % 5); size++;
105 inc_collocator(cdb, 0, 1, -i % 5); size++;
106 inc_collocator(cdb, 1, 0, i % 5); size++;
107 inc_collocator(cdb, 1, 0, -i % 5); size++;
108 inc_collocator(cdb, 0, 2, i % 5); size++;
109 inc_collocator(cdb, 0, 2, -i % 5); size++;
110 }
111 inc_collocator(cdb, 1, 2, 4); size++;
112 COLLOCATOR *c = get_collocators(cdb, 0);
113 TEST_ASSERT(c != NULL);
114 TEST_CHECK(c[0].raw == 2001);
115 TEST_CHECK(c[0].left_raw == 200);
116 TEST_CHECK(c[0].right_raw == 200);
117
118 rmrf(rocksdbfn);
119}
120
121TEST_LIST = {
122 { "open database for reading", test_open_db },
123 { "get word", test_get_word },
124 { "collocation scores", test_collocation_scores },
125 { "collocation analysis", test_collocation_analysis },
126 { "collocation analysis as json", test_collocation_analysis_as_json },
127 { "writing", test_writing },
128 { NULL, NULL }
129};