Improve token filtering in leaf nodes (fixes #168)

Change-Id: Ia53ebbe28c1f4ed861a48159c6bc88ad7dece3de
diff --git a/Changes b/Changes
index 9b9c06f..50b2458 100755
--- a/Changes
+++ b/Changes
@@ -1,6 +1,8 @@
-0.46 2022-07-06
+0.46 2022-08-08
         - Increased max query length from 1000 to 4096 characters. (kupietz)
         - Fix handling of public clients. (diewald)
+        - Improve support for non-latin tokens in annotation table views.
+          (Fixes #168; diewald)
 
 0.45 2022-05-20
         - Added confidential client support to OAuth. (diewald)
diff --git a/dev/js/spec/matchSpec.js b/dev/js/spec/matchSpec.js
index effe8d4..0d1615f 100644
--- a/dev/js/spec/matchSpec.js
+++ b/dev/js/spec/matchSpec.js
@@ -222,6 +222,54 @@
       "</span>" +
       "<span class=\"context-right\"></span>";
 
+  var snippetDiffToken = "<span class=\"context-left\"><\/span>"+
+      "<span class=\"match\">" +
+      "<span class=\"cutted\"><\/span>" +
+      "Außerdem " +
+      "<span title=\"cnx/l:meist\">" +
+      "  <span title=\"cnx/p:ADV\">" +
+      "    <span title=\"cnx/syn:@PREMOD\">" +
+      "      <span title=\"mate/l:meist\">" +
+      "        <span title=\"mate/p:ADV\">" +
+      "          <span title=\"opennlp/p:ADV\">Диес</span>" +
+      "        </span>" +
+      "      </span>" +
+      "    </span>" +
+      "  </span>" +
+      "</span>" +
+      "<mark>" + 
+      "<span title=\"cnx/l:deutlich\">" +
+      "  <span title=\"cnx/p:A\">" +
+      "    <span title=\"cnx/syn:@PREMOD\">" +
+      "      <span title=\"mate/l:deutlich\">" +
+      "        <span title=\"mate/m:degree:pos\">" +
+      "          <span title=\"mate/p:ADJD\">" +
+      "            <span title=\"opennlp/p:ADJD\">ист</span>" +
+      "          </span>" +
+      "        </span>" +
+      "      </span>" +
+      "    </span>" +
+      "  </span>" +
+      "</span>" +
+      "<span title=\"cnx/l:deutlich\">" +
+      "  <span title=\"cnx/p:A\">" +
+      "    <span title=\"cnx/syn:@PREMOD\">" +
+      "      <span title=\"mate/l:deutlich\">" +
+      "        <span title=\"mate/m:degree:pos\">" +
+      "          <span title=\"mate/p:ADJD\">" +
+      "            <span title=\"opennlp/p:ADJD\">Беиспиел</span>" +
+      "          </span>" +
+      "        </span>" +
+      "      </span>" +
+      "    </span>" +
+      "  </span>" +
+      "</span>" +
+      "</mark>" +
+      ")" +
+      "<span class=\"cutted\"><\/span>" +
+      "</span>";
+
+          
   var treeSnippetHierarchy =
       "<span class=\"context-left\"><\/span><span class=\"match\"><span title=\"corenlp\/c:MPN\">Leonard Maltin<\/span> schrieb: „<span title=\"corenlp\/c:S\"><span title=\"corenlp\/c:NP\">Plot <span title=\"corenlp\/c:MPN\">contrivance isn‘<mark>t<\/mark> handled badly<\/span><\/span> <span title=\"corenlp\/c:PP\">in above-average programmer<\/span><\/span>“.&lt;<span title=\"corenlp\/c:S\"><span title=\"corenlp\/c:ROOT\"><span title=\"corenlp\/c:NP\">ref&gt;''<span title=\"corenlp\/c:NP\"><span title=\"corenlp\/c:CNP\">Movie &amp;amp; Video<\/span> Guide<\/span><\/span>'', <span title=\"corenlp\/c:VP\">1996 edition, <span title=\"corenlp\/c:NP\"><span title=\"corenlp\/c:CNP\">S. 210<\/span><\/span><\/span>.<\/span><\/span><\/span><span class=\"context-right\"><\/span>";
 
@@ -573,7 +621,7 @@
       expect(table.getValue(5, "cnx", "l")[0]).toBe("fähig");
       expect(table.getValue(5, "cnx", "l")[1]).toBe("leistung");
     });
-   
+    
     it('should be rendered async', function () {
       var e = table.element().firstChild;
       expect(e.nodeName).toBe('TABLE');
@@ -692,6 +740,105 @@
       expect(matchElement.tagName).toEqual('LI');
     });
     
+    it('should parse into a table with non-latin characters', function () {
+      var matchElement = matchElementFactory();
+      expect(matchElement.tagName).toEqual('LI');
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      // snippet
+      expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+      expect(matchElement.children[0].children[0].classList.contains('snippet')).toBeTruthy();
+      expect(matchElement.children[0].children[0].firstChild.nodeValue).toEqual('check');
+
+      // reference
+      expect(matchElement.children[1].classList.contains('ref')).toBeTruthy();
+      expect(matchElement.children[1].firstChild.nodeValue).toEqual('me');
+
+      // not yet
+      expect(matchElement.children[0].children[1]).toBe(undefined);
+
+      /*
+      var info = matchClass.create(matchElement).info();
+      info.showTable();
+      */
+      var matchObj = matchClass.create(matchElement);
+      matchObj.open();
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      // snippet
+      expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+
+      expect(matchElement.children[0].children[0].classList.contains('snippet')).toBeTruthy();
+
+      expect(matchElement.children[0].children[0].firstChild.nodeValue).toEqual('check');
+
+      // reference
+
+      expect(matchElement.children[2].classList.contains('ref')).toBeTruthy();
+      expect(matchElement.children[2].childNodes[1].nodeValue).toEqual('me');
+
+      // Add table
+      matchObj.panel.addTable();
+
+      // now
+      var infotable = matchElement.children[1];
+      expect(infotable.tagName).toEqual('DIV');
+
+      expect(infotable.classList.contains('matchinfo')).toBeTruthy();
+    
+      expect(infotable.firstChild.firstChild.firstChild.classList.contains('matchtable')).toBeTruthy();
+
+      // expect(infotable.children[1].classList.contains('addtree')).toBeTruthy();
+    });
+
+    it('should parse into a table view with all non-latin characters (sync)', function () {
+
+      // Override getMatchInfo API call
+      KorAP.API.getMatchInfo = function (x, param, cb) {
+        cb({ "snippet": snippetDiffToken });
+      };
+
+      var matchElement = matchElementFactory();
+      expect(matchElement.tagName).toEqual('LI');
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      var matchObj = matchClass.create(matchElement);
+      matchObj.open();
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      // snippet
+      expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+
+      // Add table
+      matchObj.panel.addTable();
+
+      // now
+      var infotable = matchElement.children[1];
+      expect(infotable.tagName).toEqual('DIV');
+
+      expect(infotable.classList.contains('matchinfo')).toBeTruthy();
+    
+      expect(infotable.firstChild.firstChild.firstChild.classList.contains('matchtable')).toBeTruthy();
+
+      let matchTable = infotable.firstChild.firstChild.firstChild;
+      expect(matchTable.querySelectorAll("th:not(.cutted).no-anno")[0].textContent).toEqual('Außerdem ');
+      expect(matchTable.getElementsByTagName('TH')[4].textContent).toEqual('Диес');
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[0].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[1].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[2].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[0].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[1].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[2].classList.contains('not-empty')).toBeTruthy();
+    });
+
   });
 
   describe('KorAP.RelationsView', function () {
diff --git a/dev/js/src/match/table.js b/dev/js/src/match/table.js
index d41fc7b..a8ccdc6 100644
--- a/dev/js/src/match/table.js
+++ b/dev/js/src/match/table.js
@@ -38,6 +38,7 @@
       
       t._pos     = 0;
       t._token   = [];
+      t._anno    = [];
       t._mark    = [];
       t._markE   = undefined;
       t._cutted  = [];
@@ -119,6 +120,7 @@
         // Create object on position unless it exists
         if (t._info[t._pos] === undefined) {
           t._info[t._pos] = {};
+          t._anno[t._pos] = false;
         };
 
         // Store at position in foundry/layer as array
@@ -168,6 +170,8 @@
             if (t._layer[layer] === undefined)
               t._layer[layer] = {};
             t._layer[layer][foundry] = 1;
+
+            t._anno[t._pos] = true;
           }
 
           // The current position marks a cut
@@ -184,7 +188,7 @@
         // Leaf node
         // store string on position and go to next string
         else if (c.nodeType === 3) {
-          if (c.nodeValue.match(/[a-z0-9\u25ae]/iu)) {
+          if (!c.nodeValue.match(/^\s+$/)) {
             t._mark[t._pos] = mark ? true : false;
             t._token[t._pos++] = c.nodeValue;
           };
@@ -274,6 +278,10 @@
           c.classList.add('cutted');
         };
 
+        if (!this._anno[i]) {
+          c.classList.add('no-anno');
+        };
+        
         // In case the title is very long - add a title attribute
         if (surface.length > 20) {
           c.setAttribute("title", surface)
@@ -313,6 +321,10 @@
             if (this._mark[v]) {
               cell.classList.add('mark');
             };
+
+            if (value === undefined && this._anno[v]) {
+              cell.classList.add("not-empty");
+            };
           };
         }, this);
       }, this);
diff --git a/dev/scss/main/view/matchtable.scss b/dev/scss/main/view/matchtable.scss
index ef01955..4bed238 100644
--- a/dev/scss/main/view/matchtable.scss
+++ b/dev/scss/main/view/matchtable.scss
@@ -37,7 +37,8 @@
   }
 
   td {
-    &:empty {
+    empty-cells: hide;
+    &.not-empty:empty {
       cursor: default;
       // Fix for empty annotation lines:
       &::after {
@@ -115,7 +116,10 @@
   }
 
   thead th {
-    background-color: $darker-orange;
+    &:not(.no-anno) {
+      background-color: $darker-orange;
+    }
+
     border-top-width: 0px !important;
     text-align:       center;
 
@@ -127,7 +131,7 @@
     &.mark {
       background-color: $darkest-orange;
     }
-
+    
     &.cutted {
       background-color: $light-orange;
 
diff --git a/lib/Kalamar.pm b/lib/Kalamar.pm
index 00f8242..36b2524 100644
--- a/lib/Kalamar.pm
+++ b/lib/Kalamar.pm
@@ -8,7 +8,7 @@
 use List::Util 'none';
 
 # Minor version - may be patched from package.json
-our $VERSION = '0.45';
+our $VERSION = '0.46';
 
 # Supported version of Backend API
 our $API_VERSION = '1.0';
diff --git a/package.json b/package.json
index 6583f6f..05a0377 100755
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "name": "Kalamar",
   "description": "Mojolicious-based Frontend for KorAP",
   "license": "BSD-2-Clause",
-  "version": "0.45.1",
+  "version": "0.46.0",
   "pluginVersion": "0.2.2",
   "engines": {
     "node": ">=6.0.0"