Improve token filtering in leaf nodes (fixes #168)

Change-Id: Ia53ebbe28c1f4ed861a48159c6bc88ad7dece3de
diff --git a/dev/js/spec/matchSpec.js b/dev/js/spec/matchSpec.js
index effe8d4..0d1615f 100644
--- a/dev/js/spec/matchSpec.js
+++ b/dev/js/spec/matchSpec.js
@@ -222,6 +222,54 @@
       "</span>" +
       "<span class=\"context-right\"></span>";
 
+  var snippetDiffToken = "<span class=\"context-left\"><\/span>"+
+      "<span class=\"match\">" +
+      "<span class=\"cutted\"><\/span>" +
+      "Außerdem " +
+      "<span title=\"cnx/l:meist\">" +
+      "  <span title=\"cnx/p:ADV\">" +
+      "    <span title=\"cnx/syn:@PREMOD\">" +
+      "      <span title=\"mate/l:meist\">" +
+      "        <span title=\"mate/p:ADV\">" +
+      "          <span title=\"opennlp/p:ADV\">Диес</span>" +
+      "        </span>" +
+      "      </span>" +
+      "    </span>" +
+      "  </span>" +
+      "</span>" +
+      "<mark>" + 
+      "<span title=\"cnx/l:deutlich\">" +
+      "  <span title=\"cnx/p:A\">" +
+      "    <span title=\"cnx/syn:@PREMOD\">" +
+      "      <span title=\"mate/l:deutlich\">" +
+      "        <span title=\"mate/m:degree:pos\">" +
+      "          <span title=\"mate/p:ADJD\">" +
+      "            <span title=\"opennlp/p:ADJD\">ист</span>" +
+      "          </span>" +
+      "        </span>" +
+      "      </span>" +
+      "    </span>" +
+      "  </span>" +
+      "</span>" +
+      "<span title=\"cnx/l:deutlich\">" +
+      "  <span title=\"cnx/p:A\">" +
+      "    <span title=\"cnx/syn:@PREMOD\">" +
+      "      <span title=\"mate/l:deutlich\">" +
+      "        <span title=\"mate/m:degree:pos\">" +
+      "          <span title=\"mate/p:ADJD\">" +
+      "            <span title=\"opennlp/p:ADJD\">Беиспиел</span>" +
+      "          </span>" +
+      "        </span>" +
+      "      </span>" +
+      "    </span>" +
+      "  </span>" +
+      "</span>" +
+      "</mark>" +
+      ")" +
+      "<span class=\"cutted\"><\/span>" +
+      "</span>";
+
+          
   var treeSnippetHierarchy =
       "<span class=\"context-left\"><\/span><span class=\"match\"><span title=\"corenlp\/c:MPN\">Leonard Maltin<\/span> schrieb: „<span title=\"corenlp\/c:S\"><span title=\"corenlp\/c:NP\">Plot <span title=\"corenlp\/c:MPN\">contrivance isn‘<mark>t<\/mark> handled badly<\/span><\/span> <span title=\"corenlp\/c:PP\">in above-average programmer<\/span><\/span>“.&lt;<span title=\"corenlp\/c:S\"><span title=\"corenlp\/c:ROOT\"><span title=\"corenlp\/c:NP\">ref&gt;''<span title=\"corenlp\/c:NP\"><span title=\"corenlp\/c:CNP\">Movie &amp;amp; Video<\/span> Guide<\/span><\/span>'', <span title=\"corenlp\/c:VP\">1996 edition, <span title=\"corenlp\/c:NP\"><span title=\"corenlp\/c:CNP\">S. 210<\/span><\/span><\/span>.<\/span><\/span><\/span><span class=\"context-right\"><\/span>";
 
@@ -573,7 +621,7 @@
       expect(table.getValue(5, "cnx", "l")[0]).toBe("fähig");
       expect(table.getValue(5, "cnx", "l")[1]).toBe("leistung");
     });
-   
+    
     it('should be rendered async', function () {
       var e = table.element().firstChild;
       expect(e.nodeName).toBe('TABLE');
@@ -692,6 +740,105 @@
       expect(matchElement.tagName).toEqual('LI');
     });
     
+    it('should parse into a table with non-latin characters', function () {
+      var matchElement = matchElementFactory();
+      expect(matchElement.tagName).toEqual('LI');
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      // snippet
+      expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+      expect(matchElement.children[0].children[0].classList.contains('snippet')).toBeTruthy();
+      expect(matchElement.children[0].children[0].firstChild.nodeValue).toEqual('check');
+
+      // reference
+      expect(matchElement.children[1].classList.contains('ref')).toBeTruthy();
+      expect(matchElement.children[1].firstChild.nodeValue).toEqual('me');
+
+      // not yet
+      expect(matchElement.children[0].children[1]).toBe(undefined);
+
+      /*
+      var info = matchClass.create(matchElement).info();
+      info.showTable();
+      */
+      var matchObj = matchClass.create(matchElement);
+      matchObj.open();
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      // snippet
+      expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+
+      expect(matchElement.children[0].children[0].classList.contains('snippet')).toBeTruthy();
+
+      expect(matchElement.children[0].children[0].firstChild.nodeValue).toEqual('check');
+
+      // reference
+
+      expect(matchElement.children[2].classList.contains('ref')).toBeTruthy();
+      expect(matchElement.children[2].childNodes[1].nodeValue).toEqual('me');
+
+      // Add table
+      matchObj.panel.addTable();
+
+      // now
+      var infotable = matchElement.children[1];
+      expect(infotable.tagName).toEqual('DIV');
+
+      expect(infotable.classList.contains('matchinfo')).toBeTruthy();
+    
+      expect(infotable.firstChild.firstChild.firstChild.classList.contains('matchtable')).toBeTruthy();
+
+      // expect(infotable.children[1].classList.contains('addtree')).toBeTruthy();
+    });
+
+    it('should parse into a table view with all non-latin characters (sync)', function () {
+
+      // Override getMatchInfo API call
+      KorAP.API.getMatchInfo = function (x, param, cb) {
+        cb({ "snippet": snippetDiffToken });
+      };
+
+      var matchElement = matchElementFactory();
+      expect(matchElement.tagName).toEqual('LI');
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      var matchObj = matchClass.create(matchElement);
+      matchObj.open();
+
+      // Match
+      expect(matchElement.children[0].tagName).toEqual('DIV');
+
+      // snippet
+      expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+
+      // Add table
+      matchObj.panel.addTable();
+
+      // now
+      var infotable = matchElement.children[1];
+      expect(infotable.tagName).toEqual('DIV');
+
+      expect(infotable.classList.contains('matchinfo')).toBeTruthy();
+    
+      expect(infotable.firstChild.firstChild.firstChild.classList.contains('matchtable')).toBeTruthy();
+
+      let matchTable = infotable.firstChild.firstChild.firstChild;
+      expect(matchTable.querySelectorAll("th:not(.cutted).no-anno")[0].textContent).toEqual('Außerdem ');
+      expect(matchTable.getElementsByTagName('TH')[4].textContent).toEqual('Диес');
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[0].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[1].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[2].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[0].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[1].classList.contains('not-empty')).toBeFalsy();
+      expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[2].classList.contains('not-empty')).toBeTruthy();
+    });
+
   });
 
   describe('KorAP.RelationsView', function () {
diff --git a/dev/js/src/match/table.js b/dev/js/src/match/table.js
index d41fc7b..a8ccdc6 100644
--- a/dev/js/src/match/table.js
+++ b/dev/js/src/match/table.js
@@ -38,6 +38,7 @@
       
       t._pos     = 0;
       t._token   = [];
+      t._anno    = [];
       t._mark    = [];
       t._markE   = undefined;
       t._cutted  = [];
@@ -119,6 +120,7 @@
         // Create object on position unless it exists
         if (t._info[t._pos] === undefined) {
           t._info[t._pos] = {};
+          t._anno[t._pos] = false;
         };
 
         // Store at position in foundry/layer as array
@@ -168,6 +170,8 @@
             if (t._layer[layer] === undefined)
               t._layer[layer] = {};
             t._layer[layer][foundry] = 1;
+
+            t._anno[t._pos] = true;
           }
 
           // The current position marks a cut
@@ -184,7 +188,7 @@
         // Leaf node
         // store string on position and go to next string
         else if (c.nodeType === 3) {
-          if (c.nodeValue.match(/[a-z0-9\u25ae]/iu)) {
+          if (!c.nodeValue.match(/^\s+$/)) {
             t._mark[t._pos] = mark ? true : false;
             t._token[t._pos++] = c.nodeValue;
           };
@@ -274,6 +278,10 @@
           c.classList.add('cutted');
         };
 
+        if (!this._anno[i]) {
+          c.classList.add('no-anno');
+        };
+        
         // In case the title is very long - add a title attribute
         if (surface.length > 20) {
           c.setAttribute("title", surface)
@@ -313,6 +321,10 @@
             if (this._mark[v]) {
               cell.classList.add('mark');
             };
+
+            if (value === undefined && this._anno[v]) {
+              cell.classList.add("not-empty");
+            };
           };
         }, this);
       }, this);