w2v-server.pl: utf8 now default encoding

* use -i option for latin1 models
* new command line syntax:

e.g. perl w2v-server.pl -p 5676 models/wpdro.vecs
diff --git a/w2v-server.pl b/w2v-server.pl
index 2d1b874..b515c6b 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -3,17 +3,29 @@
 use Mojolicious::Lite;
 use Mojo::JSON qw(decode_json encode_json to_json);
 use Encode qw(decode encode);
+use Getopt::Std;
 use Mojo::Server::Daemon;
 plugin 'Log::Access';
 
+our $opt_i = 0; # latin1-input?
+our $opt_l = undef;
+our $opt_p = 5676;
+
+getopt('il:p:'); 
+
 print STDERR $ARGV[1];
 # -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 40 -binary 1 -iter 15
-if(!$ARGV[1]) {
-  init_net("vectors15.bin");
+if(!$ARGV[0]) {
+  init_net("vectors15.bin", ($opt_i? 1 : 0));
 } else {
-  init_net($ARGV[1]);
+  init_net($ARGV[0], ($opt_i? 1 : 0));
 }
 
+my $daemon = Mojo::Server::Daemon->new(
+    app    => app,
+    listen => ['http://'.($opt_l ? $opt_l : '*').":$opt_p"]
+);
+
 get '/' => sub {
   my $c    = shift;
 	my $word=$c->param('word');
@@ -29,14 +41,18 @@
 		$word =~ s/\s+/ /g;
     for my $w (split(' *\| *', $word)) {
 			$c->app->log->debug('Looking for neighbours of '.$w);
-			push(@lists, get_neighbours(encode("iso-8859-1", $w), $no_nbs));
+      if($opt_i) {
+        push(@lists, get_neighbours(encode("iso-8859-1", $w), $no_nbs));
+      } else {
+        push(@lists, get_neighbours($w, $no_nbs));
+      }
 		}
 	}
 	$word =~ s/ *\| */ | /g;
   $c->render(template=>"index", word=>$word, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, lists=> \@lists);
 };
 
-app->start;
+$daemon->run; # app->start;
 
 exit;
 
@@ -78,8 +94,9 @@
 char *vocab;
 long long words, size;
 int num_threads=20;
+int latin_enc=0;
 
-int init_net(char *file_name) {
+int init_net(char *file_name, int latin) {
   FILE *f, *binvecs, *binwords;
 	int binwords_fd, binvecs_fd;
 	long long a, b, c, d, cn;
@@ -91,6 +108,7 @@
 	strcpy(binvecs_fname, file_name);
 	strcat(binvecs_fname, ".vecs");
 
+  latin_enc = latin;
   f = fopen(file_name, "rb");
   if (f == NULL) {
     printf("Input file %s not found\n", file_name);
@@ -292,7 +310,9 @@
 		for (a = 0; a < N; a++) {
 			strcpy(bestw[a], &vocab[besti[a] * max_w]);
 			HV* hash = newHV();
-			hv_store(hash, "word", strlen("word"), newSVpvf(bestw[a], 0), 0);
+      SV* word = newSVpvf(bestw[a], 0);
+      if(latin_enc == 0) SvUTF8_on(word);
+			hv_store(hash, "word", strlen("word"), word , 0);
 			hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
 			hv_store(hash, "rank", strlen("rank"), newSVuv(besti[a]), 0);
 			AV *vector = newAV();