blob: 56d99057571fbd3f00709316653e6c6298e36015 [file] [log] [blame]
Marc Kupietz6663f112021-03-14 09:20:59 +01001#include <stdio.h>
2#include <string.h>
3#define __USE_XOPEN_EXTENDED
4#include <ftw.h>
5#include "../src/collocatordb.h"
6#include "acutest.h"
7
8char dbpath[] = "../tests/data/wpd19_10000";
9const int testword = 10; // ist
10
11void test_open_db() {
12 COLLOCATORDB *cdb;
13
14 cdb = open_collocatordb(dbpath);
15 TEST_ASSERT(cdb != NULL);
16}
17
18void test_get_word() {
19 COLLOCATORDB *cdb;
20
21 cdb = open_collocatordb(dbpath);
22 TEST_ASSERT(cdb != NULL);
23 char *word = get_word(cdb, testword);
24 char *expected = "ist";
25 TEST_CHECK(strcmp(word, expected) == 0);
26 TEST_MSG("Expected: %s", expected);
27 TEST_MSG("Produced: %s", word);
28}
29
30void test_collocation_scores() {
31 COLLOCATORDB *cdb;
32
33 cdb = open_collocatordb(dbpath);
34 TEST_ASSERT(cdb != NULL);
35 char *expected = " { \"f1\": 217,\"w1\":\"Aluminium\", \"N\": 152744, \"collocates\": [{\"word\":\"Anwendungstechnologie\",\"f2\":16,\"f\":16,\"npmi\":0.594849,\"pmi\":8.4592,\"llr\":188.227,\"lfmd\":16.4592,\"md\":12.4592,\"dice\":0.0711111,\"ld\":10.1862,\"ln_count\":16,\"rn_count\":0,\"ln_pmi\":9.4592,\"rn_pmi\":-1,\"ldaf\":11.1358,\"win\":32,\"afwin\":32}]}\n";
36 char *produced = get_collocation_scores_as_json(cdb, 62, 966);
37 TEST_CHECK(strcmp(produced, expected) == 0);
38 TEST_MSG("Expected: %s", expected);
39 TEST_MSG("Produced: %s", produced);
40}
41
42
43void test_collocation_analysis_as_json() {
44 COLLOCATORDB *cdb;
45
46 cdb = open_collocatordb(dbpath);
47 TEST_ASSERT(cdb != NULL);
48 char *json = get_collocators_as_json(cdb, testword);
49 char *needle = "\"word\":\"um\",\"f2\":264,\"f\":5,\"npmi\":-0.0556343,\"pmi\":-0.958064,\"llr\":2.87717,\"lfmd\":3.68579,\"md\":1.36386,\"dice\":0.00169952,\"ld\":4.79935,\"ln_count\":0,\"rn_count\":1,\"ln_pmi\":-1,\"rn_pmi\":-1,\"ldaf\":4.79935,\"win\":668,\"afwin\":668";
50 TEST_CHECK(strstr(json, needle) > 0);
51 TEST_MSG("Expected to contain: %s", needle);
Marc Kupietz6663f112021-03-14 09:20:59 +010052}
53
54void test_collocation_analysis() {
55 COLLOCATORDB *cdb;
56
57 cdb = open_collocatordb(dbpath);
58 TEST_ASSERT(cdb != NULL);
59 char *expected = "Anwendungstechnologie";
60 const COLLOCATOR *c = get_collocators(cdb, 62);
61 char *produced = get_word(cdb,c[0].w2);
62 TEST_CHECK(strcmp(produced, expected) == 0);
63 TEST_MSG("Expected: %s", expected);
64 TEST_MSG("Produced: %s", produced);
Marc Kupietz6663f112021-03-14 09:20:59 +010065}
66
67int unlink_cb(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
68 int rv = remove(fpath);
69 if (rv)
70 perror(fpath);
71 return rv;
72}
73
74int rmrf(char *path) {
75 return nftw(path, unlink_cb, 64, FTW_DEPTH | FTW_PHYS);
76}
77
78void test_writing() {
79 char *tmp = tempnam(NULL, NULL);
80 long size = 0;
Marc Kupietz673bd812021-03-14 17:27:44 +010081 int i;
Marc Kupietz6663f112021-03-14 09:20:59 +010082
83 char *rocksdbfn = malloc(strlen(tmp)+strlen(".rocksdb"));
84 strcpy (rocksdbfn, tmp);
85 strcat(rocksdbfn, ".rocksdb");
86 COLLOCATORDB *cdb = open_collocatordb_for_write(rocksdbfn);
87
88 char *vocabfn = malloc(strlen(tmp)+strlen(".vocab"));
89 strcpy(vocabfn, tmp);
90 strcat(vocabfn, ".vocab");
91 FILE *h = fopen(vocabfn, "w");
92 fprintf(h, "word0 2000\n");
93 fprintf(h, "word1 2000\n");
94 fprintf(h, "word2 2000\n");
95 fclose(h);
96 read_vocab(cdb, vocabfn);
97 inc_collocator(cdb, 0, 1, 4); size++;
Marc Kupietz673bd812021-03-14 17:27:44 +010098 for (i=0; i < 1000; i++) {
Marc Kupietz6663f112021-03-14 09:20:59 +010099 inc_collocator(cdb, 0, 1, i % 5); size++;
100 inc_collocator(cdb, 0, 1, -i % 5); size++;
101 inc_collocator(cdb, 1, 0, i % 5); size++;
102 inc_collocator(cdb, 1, 0, -i % 5); size++;
103 inc_collocator(cdb, 0, 2, i % 5); size++;
104 inc_collocator(cdb, 0, 2, -i % 5); size++;
105 }
106 inc_collocator(cdb, 1, 2, 4); size++;
107 COLLOCATOR *c = get_collocators(cdb, 0);
108 TEST_ASSERT(c != NULL);
Marc Kupietz1b09e4d2021-03-14 15:20:19 +0100109 TEST_CHECK(c[0].w2 == 1);
Marc Kupietz6663f112021-03-14 09:20:59 +0100110 TEST_CHECK(c[0].raw == 2001);
111 TEST_CHECK(c[0].left_raw == 200);
112 TEST_CHECK(c[0].right_raw == 200);
113
114 rmrf(rocksdbfn);
115}
116
117TEST_LIST = {
118 { "open database for reading", test_open_db },
119 { "get word", test_get_word },
120 { "collocation scores", test_collocation_scores },
121 { "collocation analysis", test_collocation_analysis },
122 { "collocation analysis as json", test_collocation_analysis_as_json },
123 { "writing", test_writing },
124 { NULL, NULL }
125};