blob: eb0471d77759075bdd39b778577398cbac22fa5a [file] [log] [blame]
Marc Kupietz6663f112021-03-14 09:20:59 +01001#include <stdio.h>
2#include <string.h>
3#define __USE_XOPEN_EXTENDED
4#include <ftw.h>
5#include "../src/collocatordb.h"
6#include "acutest.h"
7
8char dbpath[] = "../tests/data/wpd19_10000";
9const int testword = 10; // ist
10
11void test_open_db() {
Marc Kupietz5ffc4742024-11-15 15:45:12 +010012 COLLOCATORDB* cdb = open_collocatordb(dbpath);
Marc Kupietz6663f112021-03-14 09:20:59 +010013 TEST_ASSERT(cdb != NULL);
14}
15
16void test_get_word() {
Marc Kupietz5ffc4742024-11-15 15:45:12 +010017 COLLOCATORDB* cdb = open_collocatordb(dbpath);
Marc Kupietz6663f112021-03-14 09:20:59 +010018 TEST_ASSERT(cdb != NULL);
19 char *word = get_word(cdb, testword);
20 char *expected = "ist";
21 TEST_CHECK(strcmp(word, expected) == 0);
22 TEST_MSG("Expected: %s", expected);
23 TEST_MSG("Produced: %s", word);
24}
25
26void test_collocation_scores() {
Marc Kupietz5ffc4742024-11-15 15:45:12 +010027 COLLOCATORDB* cdb = open_collocatordb(dbpath);
Marc Kupietz6663f112021-03-14 09:20:59 +010028 TEST_ASSERT(cdb != NULL);
„feldmueller“2441f7c2024-11-14 16:31:30 +010029 char *expected = " { \"f1\": 217,\"w1\":\"Aluminium\", \"N\": 152743, \"collocates\": [{\"word\":\"Anwendungstechnologie\",\"f2\":16,\"f\":16,\"npmi\":0.594849,\"pmi\":8.4592,\"llr\":188.227,\"lfmd\":16.4592,\"md\":12.4592,\"dice\":0.0711111,\"ld\":10.1862,\"ln_count\":16,\"rn_count\":0,\"ln_pmi\":9.4592,\"rn_pmi\":-1,\"ldaf\":11.1358,\"win\":32,\"afwin\":32}]}\n";
Marc Kupietz6663f112021-03-14 09:20:59 +010030 char *produced = get_collocation_scores_as_json(cdb, 62, 966);
31 TEST_CHECK(strcmp(produced, expected) == 0);
32 TEST_MSG("Expected: %s", expected);
33 TEST_MSG("Produced: %s", produced);
34}
35
36
37void test_collocation_analysis_as_json() {
Marc Kupietz5ffc4742024-11-15 15:45:12 +010038 COLLOCATORDB* cdb = open_collocatordb(dbpath);
Marc Kupietz6663f112021-03-14 09:20:59 +010039 TEST_ASSERT(cdb != NULL);
40 char *json = get_collocators_as_json(cdb, testword);
„feldmueller“2441f7c2024-11-14 16:31:30 +010041 char *needle = "\"word\":\"um\",\"f2\":264,\"f\":5,\"npmi\":-0.0556349,\"pmi\":-0.958074,\"llr\":2.87723,\"lfmd\":3.68578,\"md\":1.36385,\"dice\":0.00169952,\"ld\":4.79935,\"ln_count\":0,\"rn_count\":1,\"ln_pmi\":-1,\"rn_pmi\":-1,\"ldaf\":4.79935,\"win\":668,\"afwin\":668";
Marc Kupietz6663f112021-03-14 09:20:59 +010042 TEST_CHECK(strstr(json, needle) > 0);
43 TEST_MSG("Expected to contain: %s", needle);
Marc Kupietz6663f112021-03-14 09:20:59 +010044}
45
46void test_collocation_analysis() {
Marc Kupietz5ffc4742024-11-15 15:45:12 +010047 COLLOCATORDB* cdb = open_collocatordb(dbpath);
Marc Kupietz6663f112021-03-14 09:20:59 +010048 TEST_ASSERT(cdb != NULL);
49 char *expected = "Anwendungstechnologie";
50 const COLLOCATOR *c = get_collocators(cdb, 62);
51 char *produced = get_word(cdb,c[0].w2);
52 TEST_CHECK(strcmp(produced, expected) == 0);
53 TEST_MSG("Expected: %s", expected);
54 TEST_MSG("Produced: %s", produced);
Marc Kupietz6663f112021-03-14 09:20:59 +010055}
56
57int unlink_cb(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
58 int rv = remove(fpath);
59 if (rv)
60 perror(fpath);
61 return rv;
62}
63
64int rmrf(char *path) {
65 return nftw(path, unlink_cb, 64, FTW_DEPTH | FTW_PHYS);
66}
67
68void test_writing() {
69 char *tmp = tempnam(NULL, NULL);
70 long size = 0;
Marc Kupietz673bd812021-03-14 17:27:44 +010071 int i;
Marc Kupietz6663f112021-03-14 09:20:59 +010072
73 char *rocksdbfn = malloc(strlen(tmp)+strlen(".rocksdb"));
74 strcpy (rocksdbfn, tmp);
75 strcat(rocksdbfn, ".rocksdb");
76 COLLOCATORDB *cdb = open_collocatordb_for_write(rocksdbfn);
77
78 char *vocabfn = malloc(strlen(tmp)+strlen(".vocab"));
79 strcpy(vocabfn, tmp);
80 strcat(vocabfn, ".vocab");
81 FILE *h = fopen(vocabfn, "w");
82 fprintf(h, "word0 2000\n");
83 fprintf(h, "word1 2000\n");
84 fprintf(h, "word2 2000\n");
85 fclose(h);
86 read_vocab(cdb, vocabfn);
87 inc_collocator(cdb, 0, 1, 4); size++;
Marc Kupietz673bd812021-03-14 17:27:44 +010088 for (i=0; i < 1000; i++) {
Marc Kupietz6663f112021-03-14 09:20:59 +010089 inc_collocator(cdb, 0, 1, i % 5); size++;
90 inc_collocator(cdb, 0, 1, -i % 5); size++;
91 inc_collocator(cdb, 1, 0, i % 5); size++;
92 inc_collocator(cdb, 1, 0, -i % 5); size++;
93 inc_collocator(cdb, 0, 2, i % 5); size++;
94 inc_collocator(cdb, 0, 2, -i % 5); size++;
95 }
96 inc_collocator(cdb, 1, 2, 4); size++;
97 COLLOCATOR *c = get_collocators(cdb, 0);
98 TEST_ASSERT(c != NULL);
Marc Kupietz1b09e4d2021-03-14 15:20:19 +010099 TEST_CHECK(c[0].w2 == 1);
Marc Kupietz6663f112021-03-14 09:20:59 +0100100 TEST_CHECK(c[0].raw == 2001);
101 TEST_CHECK(c[0].left_raw == 200);
102 TEST_CHECK(c[0].right_raw == 200);
103
104 rmrf(rocksdbfn);
105}
106
107TEST_LIST = {
108 { "open database for reading", test_open_db },
109 { "get word", test_get_word },
110 { "collocation scores", test_collocation_scores },
111 { "collocation analysis", test_collocation_analysis },
112 { "collocation analysis as json", test_collocation_analysis_as_json },
113 { "writing", test_writing },
114 { NULL, NULL }
115};