w2v-server: show window/column normalized collocators

commit: d64f3f24c4742de9c27f8ca7186e867d82a403c9 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Thu Nov 30 12:07:42 2017 +0100
committer: Marc Kupietz <kupietz@ids-mannheim.de> Thu Nov 30 12:07:42 2017 +0100
tree: bd734c5a669f83dd8fa962a5ea395812e861092d
parent: 580ebdfb8e7f9284eba4fb6fac40e94ed9f5d34d [diff]
diff --git a/templates/index.html.ep b/templates/index.html.ep
index 427fb56..78ec921 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep

@@ -48,18 +48,33 @@
 						 ]
 				 } );
 
-         $('#secondtable').DataTable({
+         var t = $('#secondtable').DataTable({
 						 "sScrollY": "800px",
 						 "bScrollCollapse": true,
 						 "bPaginate": false,
 						 "bJQueryUI": true,
 						 "dom": '<"top">rt<"bottom"flp><"clear">',
-						 "aoColumnDefs": [
-								 { "sWidth": "10%", "aTargets": [ -1 ] }
-						 ]
+						 "columnDefs": [
+								 { "searchable": false,
+									 "orderable": false,
+									 "targets": 0
+								 }, 
+								 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4 ] },
+								 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 5 ] },
+								 { "orderSequence": [ "desc" ], "targets": [ 3 ] }
+						 ],
+						 "order": [[ 3, 'desc' ]],
 				 } );
+				 t.on( 'order.dt search.dt', function () {
+						 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
+								 cell.innerHTML = i+1;
+						 } );
+				 } ).draw();
+
 		 });
 
+
+
 		 $(function(){
 				 $("#dropdownoptions").dialog({
 						 title: "Options",
@@ -720,15 +735,15 @@
 									% if($collocators) {
 										<th>#</th>
 										<th align="right" title="The window around the target word that is considered for summation.">w'</th>
-										<th align="right" title="Raw (max.) activation of the collocator in the output layers.">a</th>
-										<th title="Σp(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">Σp</th>
-										<th align="right">Σp/|w|</th>
+										<th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
+										<th title="(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">⊥Σa</th>
+										<th align="right">Σa/Σw</th>
 										<th title="c" align="left">collocator</th>
 									% }
 								</tr>
 							</thead>
 							<tbody>
-								% for(my $i=0; $i < 100; $i++) {
+								% for(my $i=0; $i < (@$collocators); $i++) {
 									% my $c = ($collocators? (@$collocators)[$i] : 0);
 									<tr>
 										<td align="right">
@@ -739,13 +754,13 @@
 												<span class="mono"><%= bitvec2window( $c->{pos} ) %></span>
 											</td>
 											<td align="right">
-												<%= sprintf("%.3f", $c->{dist}) %>
+												<%= sprintf("%.3f", $c->{max}) %>
 											</td>
 											<td align="right">
-												<%= sprintf("%.3e", $c->{norm}) %>
+												<%= sprintf("%.3e", $c->{conorm}) %>
 											</td>
 											<td align="right">
-												<%= sprintf("%.3e", $c->{sum}) %>
+												<%= sprintf("%.3e", $c->{prob}) %>
 											</td>
 											<td align="left">
 												<a onclick="<%= sprintf("queryKorAPCII('%s /w5 %s')", $c->{word}, $word) =%>"

diff --git a/w2v-server.pl b/w2v-server.pl
index 0487d71..38b8cb8 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl

@@ -189,9 +189,10 @@
 	long long wordi;
 	long position;
 	float activation;
-	float activation_sum;
-	float probability_sum;
 	float probability;
+	float activation_sum;
+	float conorm;
+	float max_activation;
 } collocator;
 
 typedef struct {
@@ -794,6 +795,9 @@
 			memcpy(best + b, &syn_nbs[0]->best[b], sizeof(collocator));
       best[b].position = -1; //  syn_nbs[0]->pos[b];
       best[b].activation_sum = target_sums[syn_nbs[0]->best[b].wordi];
+			best[b].max_activation = 0.0;
+			best[b].conorm = 0.0;
+			best[b].probability = 0.0;
     }
 		
 		float best_window_sum[MAX_NEIGHBOURS];
@@ -804,6 +808,9 @@
 					if(best[i].wordi == syn_nbs[a]->best[b].wordi)
 						break;
 				if(i >= found_index) {
+					best[found_index].max_activation = 0.0;
+					best[found_index].conorm = 0.0;
+					best[found_index].probability = 0.0;
 					best[found_index++].wordi = syn_nbs[a]->best[b].wordi;
 					//						printf("found: %s\n", &vocab[syn_nbs[a]->index[b] * max_w]);
 				}
@@ -813,9 +820,9 @@
 			printf("window: %d  -  syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
 			int wpos;
 			for(i=0; i < found_index; i++) {
-				best[i].activation = 0; best[i].probability = 0;
+				best[i].activation = best[i].probability = best[i].conorm = 0;
 				for(w=1; w <  (1 << syn_threads); w++) { // loop through all possible windows
-					float word_window_sum = 0, word_activation_sum = 0, total_window_sum = 0;
+					float word_window_sum = 0, word_window_conorm=0, word_activation_sum = 0, total_window_sum = 0;
 					int bits_set = 0;
 					for(a=0; a < syn_threads; a++) {
 						if((1 << a) & w) {
@@ -831,10 +838,13 @@
 						  for(b=0; b < syn_nbs[a]->length; b++)
 							  if(best[i].wordi == syn_nbs[a]->best[b].wordi) {
 //                  word_window_sum += syn_nbs[a]->dist[b] *  syn_nbs[a]->norm[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
-//                    word_window_sum += syn_nbs[a]->dist[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
 //                    word_window_sum += syn_nbs[a]->norm[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
 //                  word_window_sum = (word_window_sum + syn_nbs[a]->norm[b]) - (word_window_sum * syn_nbs[a]->norm[b]);  // syn_nbs[a]->norm[b];
-                  word_window_sum += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation;  // conormalied activation sum
+                  word_window_sum += syn_nbs[a]->best[b].activation; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
+                  word_window_conorm += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation;  // conormalied activation sum
+									word_activation_sum += syn_nbs[a]->best[b].activation;
+                  if(syn_nbs[a]->best[b].activation > best[i].max_activation)
+										best[i].max_activation = syn_nbs[a]->best[b].activation;
 									word_activation_sum += syn_nbs[a]->best[b].activation;
 								}
 						}
@@ -843,13 +853,17 @@
 //						word_activation_sum /= bits_set;
 //						word_window_sum /= bits_set;
 //          }
-//          word_window_sum /= total_window_sum;
+          word_window_sum /= total_window_sum;
 
           if(word_window_sum > best[i].probability) {
 						best[i].probability = word_window_sum;
-						best[i].activation = word_activation_sum;
 						best[i].position = w;
 					}
+
+          if(word_window_conorm > best[i].conorm) {
+						best[i].conorm = word_window_conorm;
+						best[i].activation = word_activation_sum;
+					}
 			  }
 			}
 			qsort(best, found_index, sizeof(collocator), cmp_probability);
@@ -927,9 +941,9 @@
       if(latin_enc == 0) SvUTF8_on(word);
       hv_store(hash, "word", strlen("word"), word , 0);
       hv_store(hash, "rank", strlen("rank"), newSVuv(best[a].wordi), 0);
-      hv_store(hash, "dist", strlen("dist"), newSVnv(best[a].activation), 0);
-      hv_store(hash, "norm", strlen("norm"), newSVnv(best[a].probability), 0);
-      hv_store(hash, "sum", strlen("sum"), newSVnv(target_sums[best[a].wordi]), 0);
+      hv_store(hash, "conorm", strlen("conorm"), newSVnv(best[a].conorm), 0);
+      hv_store(hash, "prob", strlen("prob"), newSVnv(best[a].probability), 0);
+      hv_store(hash, "max", strlen("max"), newSVnv(best[a].max_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
       hv_store(hash, "pos", strlen("pos"), newSVnv(best[a].position), 0);
       av_push(array, newRV_noinc((SV*)hash));
     }
commit	d64f3f24c4742de9c27f8ca7186e867d82a403c9	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Nov 30 12:07:42 2017 +0100
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Nov 30 12:07:42 2017 +0100
tree	bd734c5a669f83dd8fa962a5ea395812e861092d
parent	580ebdfb8e7f9284eba4fb6fac40e94ed9f5d34d [diff]