w2v-server: show all collected collocators

commit: 50485ba5cb4b983003c00628278715cae45d5f62 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Wed Mar 23 09:13:14 2016 +0100
committer: Marc Kupietz <kupietz@ids-mannheim.de> Thu Mar 31 21:26:24 2016 +0200
tree: 4901197d6eea00001dbf8d1d56234cf339e758f0
parent: 271e2a46a072d1f7eca0afc9c956cc8cadc8f750 [diff] [blame]
diff --git a/w2v-server.pl b/w2v-server.pl
index ec33052..5a24fdb 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl

@@ -420,8 +420,7 @@
 SV *get_neighbours(char *st1, int N) {
   HV *result = newHV();
 	float bestd[MAX_NEIGHBOURS], bestn[MAX_NEIGHBOURS], vec[max_size];
-	long long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
-	char *bestw[MAX_NEIGHBOURS];
+	long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
 	knn *para_nbs[MAX_THREADS];
 	knn *syn_nbs[MAX_THREADS];
 	knnpars pars[MAX_THREADS];
@@ -486,12 +485,8 @@
 
   AV* array = newAV();
   for (a = 0; a < N; a++) {
-    bestw[a] = (char *)malloc(max_size * sizeof(char));
-  }
-  for (a = 0; a < N; a++) {
-    strcpy(bestw[a], &vocab[besti[a] * max_w]);
     HV* hash = newHV();
-    SV* word = newSVpvf(bestw[a], 0);
+    SV* word = newSVpvf(&vocab[besti[a] * max_w], 0);
     if(latin_enc == 0) SvUTF8_on(word);
     hv_store(hash, "word", strlen("word"), word , 0);
     hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
@@ -505,13 +500,21 @@
   }
   hv_store(result, "paradigmatic", strlen("paradigmatic"), newRV_noinc((SV*)array), 0);
   
+  for(b=0; b < MAX_NEIGHBOURS; b++) {
+    besti[b] = -1L;
+    bestd[b] = 0;
+    bestn[b] = 0;
+    bestp[b] = 0;
+  }
+
   printf("Waiting for syn threads to join\n");
   fflush(stdout);
   for (a = 0; a < syn_threads; a++) pthread_join(pt[a+para_threads], &syn_nbs[a]);
   printf("syn threads joint\n");
   fflush(stdout);
   
-  for(b=0; b < N; b++) {
+
+  for(b=0; b < syn_nbs[0]->length; b++) {
     besti[b] = syn_nbs[0]->index[b];
     bestd[b] = syn_nbs[0]->dist[b];
     bestn[b] = syn_nbs[0]->norm[b];
@@ -520,8 +523,8 @@
   
   
   for(a=1; a < syn_threads; a++) {
-    for(b=0; b < N; b++) {
-      for(c=0; c < N; c++) {
+    for(b=0; b < syn_nbs[a]->length; b++) {
+      for(c=0; c < MAX_NEIGHBOURS; c++) {
         if(syn_nbs[a]->dist[b] > bestd[c]) {
           for(d=N-1; d>c; d--) {
             bestd[d] = bestd[d-1];
@@ -539,10 +542,9 @@
     }
   }
   array = newAV();
-  for (a = 0; a < N && besti[a] >= 0; a++) {
-    strcpy(bestw[a], &vocab[besti[a] * max_w]);
+  for (a = 0; a < MAX_NEIGHBOURS && besti[a] >= 0; a++) {
     HV* hash = newHV();
-    SV* word = newSVpvf(bestw[a], 0);
+    SV* word = newSVpvf(&vocab[besti[a] * max_w], 0);
     if(latin_enc == 0) SvUTF8_on(word);
     hv_store(hash, "word", strlen("word"), word , 0);
     hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
@@ -841,17 +843,20 @@
 				<th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th><th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title="&#34;Responsivenes&#34; of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="left">syntagmatic</th>
 			</tr>
 			% my $j=0; my @words; my @vecs; my @ranks; for my $list (@$lists) {
-			% my $i=0; for my $item (@$list) {
-      % my $c = (@$collocators)[$i];      
-			% if(!grep{$_ eq $item->{word}} @words) {
-      %   push @vecs, $item->{vector};
-			%   push @words, $item->{word};
-			%   push @ranks, $item->{rank};
-      % }
+			% my $i=0; while(1) {
+      % my $item = (@$list)[$i];
+      % my $c = (@$collocators)[$i];
+      % last if(!$c && !$item);
 			<tr>
 				<td align="right">
   				<%= ++$i %>.
 				</td>
+        % if($item) {
+			  % if(!grep{$_ eq $item->{word}} @words) {
+        %   push @vecs, $item->{vector};
+			  %   push @words, $item->{word};
+			  %   push @ranks, $item->{rank};
+        % }
 				<td align="right">
   				<%= sprintf("%.3f", $item->{dist}) %>
 				</td>
@@ -860,6 +865,9 @@
 						<%= $item->{word} %>
 					</a>
 				</td>
+        % } else {
+          <td colspan="2"/>
+        % }
         % if($c) {
 				<td align="right">
           <%= $c->{pos} %>:
commit	50485ba5cb4b983003c00628278715cae45d5f62	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Mar 23 09:13:14 2016 +0100
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Mar 31 21:26:24 2016 +0200
tree	4901197d6eea00001dbf8d1d56234cf339e758f0
parent	271e2a46a072d1f7eca0afc9c956cc8cadc8f750 [diff] [blame]