Let HST require at least one letter

(as in mastodon)

Change-Id: I2be91a30a6803933dd64d6a789fc28501ae18632
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20d0894..e25a8dd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@
 ### Changed
 
 - `ADR` is now emitted whenever a token matches the `@`-address pattern, regardless of existing POS values in the input.
+- Purely numeric tokens such as `#10` are no longer tagged as `HST`; hashtags must contain at least one letter.
 - Documentation examples were revised and anonymized for public release.
 
 ## 1.0.0
diff --git a/Readme.md b/Readme.md
index e4cb61b..1fc5c46 100644
--- a/Readme.md
+++ b/Readme.md
@@ -26,11 +26,13 @@
 | `EMOIMG` | Unicode emoji tokens | `😂`, `😇` | Writes `EMOIMG` to XPOS, normalizes LEMMA to the base emoji, and adds FEATS metadata |
 | `AKW` | Action words / inflectives | `:grins:` | Writes `AKW` to XPOS |
 | `EMOASC` | ASCII emoticons | `:)`, `<3` | Writes `EMOASC` to XPOS |
-| `HST` | Hashtags | `#KorAP`, `#10` | Writes `HST` to XPOS |
+| `HST` | Hashtags | `#KorAP`, `#3D` | Writes `HST` to XPOS when the hashtag contains at least one letter |
 | `URL` | URLs | `https://korap.ids-mannheim.de` | Writes `URL` to XPOS |
 | `EML` | Email addresses | `mail@example.org` | Writes `EML` to XPOS |
 | `ADR` | `@`-names / addresses | `@markup` | Writes `ADR` to XPOS |
 
+Numeric-only forms such as `#10` are not tagged as `HST`.
+
 ## CoNLL-U Output Examples
 
 The following example shows how the different tags appear in CoNLL-U output. In all cases, the annotation is written to XPOS; only `EMOIMG` additionally changes LEMMA and FEATS.
diff --git a/src/index.js b/src/index.js
index 148ff52..ce45a04 100755
--- a/src/index.js
+++ b/src/index.js
@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 
 const emoticonRegex = /^(\:\w+\:|\<[\/\\]?3|[\(\)\\\D|\*\$][\-\^]?[\:\;\=]|[\:\;\=B8][\-\^]?[3DOPp\@\$\*\\\)\(\/\|])(?=\s|[\!\.\?]|$)/;
-const hashtagRegex = /^#[a-zA-Z0-9]+/;
+const hashtagRegex = /^#[a-zA-Z0-9]*[a-zA-Z][a-zA-Z0-9]*$/;
 const urlRegex = /^(ftp|http)s?:\/\/[^\s]+/;
 const emailRegex = /^\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/;
 const addressRegex = /^@[a-zA-Z0-9]+/;
diff --git a/test/test.js b/test/test.js
index aeaaf10..9eea567 100644
--- a/test/test.js
+++ b/test/test.js
@@ -12,7 +12,7 @@
     var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
     expect(ascimg_count).toBe(30);
     var hst_count = (stdout.match(/\tHST\t/g) || []).length;
-    expect(hst_count).toBe(14);
+    expect(hst_count).toBe(12);
     var url_count = (stdout.match(/\tURL\t/g) || []).length;
     expect(url_count).toBe(4);
     var adr_count = (stdout.match(/\tADR\t/g) || []).length;
@@ -22,7 +22,7 @@
     var eof_count = (stdout.match(/\n# eof/g) || []).length;
     expect(eof_count).toBe(1);
     var lines_count = (stdout.split("\n")).length;
-    expect(lines_count).toBe(810);
+    expect(lines_count).toBe(803);
     done();
   });
 
@@ -35,7 +35,7 @@
     var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
     expect(ascimg_count).toBe(30);
     var hst_count = (stdout.match(/\tHST\t/g) || []).length;
-    expect(hst_count).toBe(14);
+    expect(hst_count).toBe(12);
     var url_count = (stdout.match(/\tURL\t/g) || []).length;
     expect(url_count).toBe(4);
     var adr_count = (stdout.match(/\tADR\t/g) || []).length;
@@ -49,15 +49,17 @@
     const testInput = [
       '# foundry = base',
       '# text_id = test-hashtag',
-      '# text = #KorAP #10',
+      '# text = #KorAP #3D #10',
       ['1', '#KorAP', '_', '_', '_', '_', '_', '_', '_', '_'].join('\t'),
-      ['2', '#10', '_', '_', '_', '_', '_', '_', '_', '_'].join('\t'),
+      ['2', '#3D', '_', '_', '_', '_', '_', '_', '_', '_'].join('\t'),
+      ['3', '#10', '_', '_', '_', '_', '_', '_', '_', '_'].join('\t'),
       ''
     ].join('\n');
     const stdout = execSync('node src/index.js', { input: testInput }).toString();
 
     expect(stdout).toContain('#KorAP\t_\t_\tHST');
-    expect(stdout).toContain('#10\t_\t_\tHST');
+    expect(stdout).toContain('#3D\t_\t_\tHST');
+    expect(stdout).not.toContain('#10\t_\t_\tHST');
 
     var hst_count = (stdout.match(/\tHST\t/g) || []).length;
     expect(hst_count).toBe(2);