Always tag as ADR if pattern matches
Change-Id: I7f8636eb3d9e8b03fad2e8747d50d4cb208bfc43
diff --git a/src/index.js b/src/index.js
index 0c3faf1..148ff52 100755
--- a/src/index.js
+++ b/src/index.js
@@ -147,7 +147,7 @@
new_tag = 'URL';
} else if (word.match(emailRegex)) {
new_tag = 'EML';
- } else if (!columns[3].match("^(NE|PROPN)") && word.match(addressRegex)) {
+ } else if (word.match(addressRegex)) {
new_tag = 'ADR';
}
if (new_tag) {
diff --git a/test/test.js b/test/test.js
index 0f670e7..aeaaf10 100644
--- a/test/test.js
+++ b/test/test.js
@@ -64,6 +64,25 @@
done();
});
+ test('Regression test for addresses: emit ADR regardless of existing POS values', (done) => {
+ const testInput = [
+ '# foundry = base',
+ '# text_id = test-address',
+ '# text = @handle @markup',
+ ['1', '@handle', '_', 'PROPN', '_', '_', '_', '_', '_', '_'].join('\t'),
+ ['2', '@markup', '_', 'NE', '_', '_', '_', '_', '_', '_'].join('\t'),
+ ''
+ ].join('\n');
+ const stdout = execSync('node src/index.js', { input: testInput }).toString();
+
+ expect(stdout).toContain('@handle\t_\tPROPN\tADR');
+ expect(stdout).toContain('@markup\t_\tNE\tADR');
+
+ var adr_count = (stdout.match(/\tADR\t/g) || []).length;
+ expect(adr_count).toBe(2);
+ done();
+ });
+
test('Regression test for issue #113: emoji modifiers and ZWJ', (done) => {
// Test that compound emojis with modifiers and ZWJ are recognized as single EMOIMG tokens
const testInput = `# foundry = base