Improve token filtering in leaf nodes (fixes #168)
Change-Id: Ia53ebbe28c1f4ed861a48159c6bc88ad7dece3de
diff --git a/dev/js/spec/matchSpec.js b/dev/js/spec/matchSpec.js
index effe8d4..0d1615f 100644
--- a/dev/js/spec/matchSpec.js
+++ b/dev/js/spec/matchSpec.js
@@ -222,6 +222,54 @@
"</span>" +
"<span class=\"context-right\"></span>";
+ var snippetDiffToken = "<span class=\"context-left\"><\/span>"+
+ "<span class=\"match\">" +
+ "<span class=\"cutted\"><\/span>" +
+ "Außerdem " +
+ "<span title=\"cnx/l:meist\">" +
+ " <span title=\"cnx/p:ADV\">" +
+ " <span title=\"cnx/syn:@PREMOD\">" +
+ " <span title=\"mate/l:meist\">" +
+ " <span title=\"mate/p:ADV\">" +
+ " <span title=\"opennlp/p:ADV\">Диес</span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ "</span>" +
+ "<mark>" +
+ "<span title=\"cnx/l:deutlich\">" +
+ " <span title=\"cnx/p:A\">" +
+ " <span title=\"cnx/syn:@PREMOD\">" +
+ " <span title=\"mate/l:deutlich\">" +
+ " <span title=\"mate/m:degree:pos\">" +
+ " <span title=\"mate/p:ADJD\">" +
+ " <span title=\"opennlp/p:ADJD\">ист</span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ "</span>" +
+ "<span title=\"cnx/l:deutlich\">" +
+ " <span title=\"cnx/p:A\">" +
+ " <span title=\"cnx/syn:@PREMOD\">" +
+ " <span title=\"mate/l:deutlich\">" +
+ " <span title=\"mate/m:degree:pos\">" +
+ " <span title=\"mate/p:ADJD\">" +
+ " <span title=\"opennlp/p:ADJD\">Беиспиел</span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ " </span>" +
+ "</span>" +
+ "</mark>" +
+ ")" +
+ "<span class=\"cutted\"><\/span>" +
+ "</span>";
+
+
var treeSnippetHierarchy =
"<span class=\"context-left\"><\/span><span class=\"match\"><span title=\"corenlp\/c:MPN\">Leonard Maltin<\/span> schrieb: „<span title=\"corenlp\/c:S\"><span title=\"corenlp\/c:NP\">Plot <span title=\"corenlp\/c:MPN\">contrivance isn‘<mark>t<\/mark> handled badly<\/span><\/span> <span title=\"corenlp\/c:PP\">in above-average programmer<\/span><\/span>“.<<span title=\"corenlp\/c:S\"><span title=\"corenlp\/c:ROOT\"><span title=\"corenlp\/c:NP\">ref>''<span title=\"corenlp\/c:NP\"><span title=\"corenlp\/c:CNP\">Movie &amp; Video<\/span> Guide<\/span><\/span>'', <span title=\"corenlp\/c:VP\">1996 edition, <span title=\"corenlp\/c:NP\"><span title=\"corenlp\/c:CNP\">S. 210<\/span><\/span><\/span>.<\/span><\/span><\/span><span class=\"context-right\"><\/span>";
@@ -573,7 +621,7 @@
expect(table.getValue(5, "cnx", "l")[0]).toBe("fähig");
expect(table.getValue(5, "cnx", "l")[1]).toBe("leistung");
});
-
+
it('should be rendered async', function () {
var e = table.element().firstChild;
expect(e.nodeName).toBe('TABLE');
@@ -692,6 +740,105 @@
expect(matchElement.tagName).toEqual('LI');
});
+ it('should parse into a table with non-latin characters', function () {
+ var matchElement = matchElementFactory();
+ expect(matchElement.tagName).toEqual('LI');
+
+ // Match
+ expect(matchElement.children[0].tagName).toEqual('DIV');
+
+ // snippet
+ expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+ expect(matchElement.children[0].children[0].classList.contains('snippet')).toBeTruthy();
+ expect(matchElement.children[0].children[0].firstChild.nodeValue).toEqual('check');
+
+ // reference
+ expect(matchElement.children[1].classList.contains('ref')).toBeTruthy();
+ expect(matchElement.children[1].firstChild.nodeValue).toEqual('me');
+
+ // not yet
+ expect(matchElement.children[0].children[1]).toBe(undefined);
+
+ /*
+ var info = matchClass.create(matchElement).info();
+ info.showTable();
+ */
+ var matchObj = matchClass.create(matchElement);
+ matchObj.open();
+
+ // Match
+ expect(matchElement.children[0].tagName).toEqual('DIV');
+
+ // snippet
+ expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+
+ expect(matchElement.children[0].children[0].classList.contains('snippet')).toBeTruthy();
+
+ expect(matchElement.children[0].children[0].firstChild.nodeValue).toEqual('check');
+
+ // reference
+
+ expect(matchElement.children[2].classList.contains('ref')).toBeTruthy();
+ expect(matchElement.children[2].childNodes[1].nodeValue).toEqual('me');
+
+ // Add table
+ matchObj.panel.addTable();
+
+ // now
+ var infotable = matchElement.children[1];
+ expect(infotable.tagName).toEqual('DIV');
+
+ expect(infotable.classList.contains('matchinfo')).toBeTruthy();
+
+ expect(infotable.firstChild.firstChild.firstChild.classList.contains('matchtable')).toBeTruthy();
+
+ // expect(infotable.children[1].classList.contains('addtree')).toBeTruthy();
+ });
+
+ it('should parse into a table view with all non-latin characters (sync)', function () {
+
+ // Override getMatchInfo API call
+ KorAP.API.getMatchInfo = function (x, param, cb) {
+ cb({ "snippet": snippetDiffToken });
+ };
+
+ var matchElement = matchElementFactory();
+ expect(matchElement.tagName).toEqual('LI');
+
+ // Match
+ expect(matchElement.children[0].tagName).toEqual('DIV');
+
+ var matchObj = matchClass.create(matchElement);
+ matchObj.open();
+
+ // Match
+ expect(matchElement.children[0].tagName).toEqual('DIV');
+
+ // snippet
+ expect(matchElement.children[0].children[0].tagName).toEqual('DIV');
+
+ // Add table
+ matchObj.panel.addTable();
+
+ // now
+ var infotable = matchElement.children[1];
+ expect(infotable.tagName).toEqual('DIV');
+
+ expect(infotable.classList.contains('matchinfo')).toBeTruthy();
+
+ expect(infotable.firstChild.firstChild.firstChild.classList.contains('matchtable')).toBeTruthy();
+
+ let matchTable = infotable.firstChild.firstChild.firstChild;
+ expect(matchTable.querySelectorAll("th:not(.cutted).no-anno")[0].textContent).toEqual('Außerdem ');
+ expect(matchTable.getElementsByTagName('TH')[4].textContent).toEqual('Диес');
+ expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[0].classList.contains('not-empty')).toBeFalsy();
+ expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[1].classList.contains('not-empty')).toBeFalsy();
+ expect(matchTable.querySelectorAll("tbody tr:nth-child(1) td")[2].classList.contains('not-empty')).toBeFalsy();
+ expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[0].classList.contains('not-empty')).toBeFalsy();
+ expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[1].classList.contains('not-empty')).toBeFalsy();
+ expect(matchTable.querySelectorAll("tbody tr:nth-child(5) td")[2].classList.contains('not-empty')).toBeTruthy();
+ });
+
});
describe('KorAP.RelationsView', function () {
diff --git a/dev/js/src/match/table.js b/dev/js/src/match/table.js
index d41fc7b..a8ccdc6 100644
--- a/dev/js/src/match/table.js
+++ b/dev/js/src/match/table.js
@@ -38,6 +38,7 @@
t._pos = 0;
t._token = [];
+ t._anno = [];
t._mark = [];
t._markE = undefined;
t._cutted = [];
@@ -119,6 +120,7 @@
// Create object on position unless it exists
if (t._info[t._pos] === undefined) {
t._info[t._pos] = {};
+ t._anno[t._pos] = false;
};
// Store at position in foundry/layer as array
@@ -168,6 +170,8 @@
if (t._layer[layer] === undefined)
t._layer[layer] = {};
t._layer[layer][foundry] = 1;
+
+ t._anno[t._pos] = true;
}
// The current position marks a cut
@@ -184,7 +188,7 @@
// Leaf node
// store string on position and go to next string
else if (c.nodeType === 3) {
- if (c.nodeValue.match(/[a-z0-9\u25ae]/iu)) {
+ if (!c.nodeValue.match(/^\s+$/)) {
t._mark[t._pos] = mark ? true : false;
t._token[t._pos++] = c.nodeValue;
};
@@ -274,6 +278,10 @@
c.classList.add('cutted');
};
+ if (!this._anno[i]) {
+ c.classList.add('no-anno');
+ };
+
// In case the title is very long - add a title attribute
if (surface.length > 20) {
c.setAttribute("title", surface)
@@ -313,6 +321,10 @@
if (this._mark[v]) {
cell.classList.add('mark');
};
+
+ if (value === undefined && this._anno[v]) {
+ cell.classList.add("not-empty");
+ };
};
}, this);
}, this);