word2vec: web-app
diff --git a/w2v-server.pl b/w2v-server.pl
new file mode 100644
index 0000000..2767360
--- /dev/null
+++ b/w2v-server.pl
@@ -0,0 +1,164 @@
+#!/usr/local/bin/perl
+use Inline C;
+use Mojolicious::Lite;
+
+init_net($ARGV[1]);
+
+helper print_neighbours => sub { shift; print_neighbours(@_) };
+
+get '/' => sub {
+ my $c = shift;
+ my $word=$c->param('word');
+ $c->render(template=>"index", word=>$word);
+};
+
+app->start;
+
+exit;
+
+__END__
+
+__C__
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <malloc.h>
+#include <stdlib.h> //strlen
+
+#define max_size 2000
+#define max_w 50
+#define N 75
+
+//the thread function
+void *connection_handler(void *);
+
+char *bestw[N];
+char file_name[max_size], st[100][max_size];
+float dist, len, bestd[N], vec[max_size];
+long long words, size, a, b, c, d, cn, bi[100];
+char ch;
+float *M;
+char *vocab;
+char *stringBuffer;
+
+int init_net(char *file_name) {
+ FILE *f;
+
+ stringBuffer = malloc(64000);
+ f = fopen(file_name, "rb");
+ if (f == NULL) {
+ printf("Input file %s not found\n", file_name);
+ return -1;
+ }
+ fscanf(f, "%lld", &words);
+ fscanf(f, "%lld", &size);
+ vocab = (char *)malloc((long long)words * max_w * sizeof(char));
+ for (a = 0; a < N; a++) bestw[a] = (char *)malloc(max_size * sizeof(char));
+ M = (float *)malloc((long long)words * (long long)size * sizeof(float));
+ if (M == NULL) {
+ printf("Cannot allocate memory: %lld MB %lld %lld\n", (long long)words * size * sizeof(float) / 1048576, words, size);
+ return -1;
+ }
+ for (b = 0; b < words; b++) {
+ a = 0;
+ while (1) {
+ vocab[b * max_w + a] = fgetc(f);
+ if (feof(f) || (vocab[b * max_w + a] == ' ')) break;
+ if ((a < max_w) && (vocab[b * max_w + a] != '\n')) a++;
+ }
+ vocab[b * max_w + a] = 0;
+ for (a = 0; a < size; a++) fread(&M[a + b * size], sizeof(float), 1, f);
+ len = 0;
+ for (a = 0; a < size; a++) len += M[a + b * size] * M[a + b * size];
+ len = sqrt(len);
+ for (a = 0; a < size; a++) M[a + b * size] /= len;
+ }
+ fclose(f);
+ return 0;
+}
+
+char *print_neighbours(char *st1) {
+ FILE *out=stdout;
+ *stringBuffer=0;
+
+ for (a = 0; a < N; a++) bestd[a] = 0;
+ for (a = 0; a < N; a++) bestw[a][0] = 0;
+ a = 0;
+ cn = 0;
+ b = 0;
+ c = 0;
+ while (1) {
+ st[cn][b] = st1[c];
+ b++;
+ c++;
+ st[cn][b] = 0;
+ if (st1[c] == 0) break;
+ if (st1[c] == ' ') {
+ cn++;
+ b = 0;
+ c++;
+ }
+ }
+ cn++;
+ for (a = 0; a < cn; a++) {
+ for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
+ if (b == words) b = -1;
+ bi[a] = b;
+ sprintf(stringBuffer, "\n<pre>Word: \"%s\" Position in vocabulary: %lld</pre>\n", st[a], bi[a]);
+ if (b == -1) {
+ sprintf(stringBuffer+strlen(stringBuffer), "Out of dictionary word!\n");
+ break;
+ }
+ }
+ if (b == -1) return stringBuffer;
+ sprintf(stringBuffer+strlen(stringBuffer), "\n<table><tr><th>Word</th><th>Cosine distance</th></tr>\n");
+ for (a = 0; a < size; a++) vec[a] = 0;
+ for (b = 0; b < cn; b++) {
+ if (bi[b] == -1) continue;
+ for (a = 0; a < size; a++) vec[a] += M[a + bi[b] * size];
+ }
+ len = 0;
+ for (a = 0; a < size; a++) len += vec[a] * vec[a];
+ len = sqrt(len);
+ for (a = 0; a < size; a++) vec[a] /= len;
+ for (a = 0; a < N; a++) bestd[a] = -1;
+ for (a = 0; a < N; a++) bestw[a][0] = 0;
+ for (c = 0; c < words; c++) {
+ a = 0;
+ for (b = 0; b < cn; b++) if (bi[b] == c) a = 1;
+ if (a == 1) continue;
+ dist = 0;
+ for (a = 0; a < size; a++) dist += vec[a] * M[a + c * size];
+ for (a = 0; a < N; a++) {
+ if (dist > bestd[a]) {
+ for (d = N - 1; d > a; d--) {
+ bestd[d] = bestd[d - 1];
+ strcpy(bestw[d], bestw[d - 1]);
+ }
+ bestd[a] = dist;
+ strcpy(bestw[a], &vocab[c * max_w]);
+ break;
+ }
+ }
+ }
+ for (a = 0; a < N; a++) sprintf(stringBuffer+strlen(stringBuffer), "<tr><td>%s</td><td align=\"right\">%f</td></tr>\n", bestw[a], bestd[a]);
+ sprintf(stringBuffer+strlen(stringBuffer), "</table>\n");
+ return stringBuffer;
+}
+
+__DATA__
+
+@@ index.html.ep
+<!DOCTYPE html>
+<html>
+<head><title>word2vec</title></head>
+<body>
+ <form action="<%=url_for('/')->to_abs%>" method="GET">
+ Word: <input type="text" name="word">
+ <input type="submit" value="Show neighbours">
+ </form>
+ <br>
+ <%== print_neighbours($word) %>
+</body>
+</html>
+