Marc Kupietz | c8ddf45 | 2018-01-07 21:33:12 +0100 | [diff] [blame] | 1 | #include <stdio.h> |
| 2 | #include <stdlib.h> |
| 3 | #include <string.h> |
| 4 | #include <math.h> |
| 5 | #include "collocatordb.h" |
| 6 | |
| 7 | uint64_t total=0; |
| 8 | |
| 9 | vocab_entry vocab[100000]; |
| 10 | |
| 11 | void read_vocab(char *fname) { |
| 12 | char strbuf[2048]; |
| 13 | long long freq; |
| 14 | FILE *fin = fopen(fname, "rb"); |
| 15 | if (fin == NULL) { |
| 16 | printf("Vocabulary file not found\n"); |
| 17 | exit(1); |
| 18 | } |
| 19 | uint64_t i = 0; |
| 20 | while(!feof(fin)) { |
| 21 | fscanf(fin, "%s %lld", strbuf, &freq); |
| 22 | vocab[i].word = strdup(strbuf); |
| 23 | vocab[i].freq = freq; |
| 24 | total += freq; |
| 25 | i++; |
| 26 | } |
| 27 | fclose(fin); |
| 28 | } |
| 29 | |
| 30 | int main() { |
Marc Kupietz | 6bb2776 | 2018-01-09 17:53:01 +0100 | [diff] [blame^] | 31 | COLLOCATORS *cdb = open_collocators_for_read("/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/wang2vec/sample"); |
Marc Kupietz | c8ddf45 | 2018-01-07 21:33:12 +0100 | [diff] [blame] | 32 | read_vocab("/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/wang2vec/sample.vocab"); |
Marc Kupietz | 6bb2776 | 2018-01-09 17:53:01 +0100 | [diff] [blame^] | 33 | for(int i=100; i < 1000; i++) |
Marc Kupietz | c8ddf45 | 2018-01-07 21:33:12 +0100 | [diff] [blame] | 34 | get_collocators(cdb, i, vocab, total); |
| 35 | printf("%s\n", get_collocators_as_json(cdb, 500, vocab, total)); |
| 36 | return 0; |
| 37 | } |