| Leo Repp | 58b9f11 | 2021-11-22 11:57:47 +0100 | [diff] [blame^] | 1 | /** Used to compose unicode character classes. */ |
| 2 | var rsAstralRange = '\\ud800-\\udfff', |
| 3 | rsComboMarksRange = '\\u0300-\\u036f', |
| 4 | reComboHalfMarksRange = '\\ufe20-\\ufe2f', |
| 5 | rsComboSymbolsRange = '\\u20d0-\\u20ff', |
| 6 | rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange, |
| 7 | rsVarRange = '\\ufe0e\\ufe0f'; |
| 8 | |
| 9 | /** Used to compose unicode capture groups. */ |
| 10 | var rsAstral = '[' + rsAstralRange + ']', |
| 11 | rsCombo = '[' + rsComboRange + ']', |
| 12 | rsFitz = '\\ud83c[\\udffb-\\udfff]', |
| 13 | rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', |
| 14 | rsNonAstral = '[^' + rsAstralRange + ']', |
| 15 | rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', |
| 16 | rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', |
| 17 | rsZWJ = '\\u200d'; |
| 18 | |
| 19 | /** Used to compose unicode regexes. */ |
| 20 | var reOptMod = rsModifier + '?', |
| 21 | rsOptVar = '[' + rsVarRange + ']?', |
| 22 | rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', |
| 23 | rsSeq = rsOptVar + reOptMod + rsOptJoin, |
| 24 | rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; |
| 25 | |
| 26 | /** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ |
| 27 | var reUnicode = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g'); |
| 28 | |
| 29 | /** |
| 30 | * Converts a Unicode `string` to an array. |
| 31 | * |
| 32 | * @private |
| 33 | * @param {string} string The string to convert. |
| 34 | * @returns {Array} Returns the converted array. |
| 35 | */ |
| 36 | function unicodeToArray(string) { |
| 37 | return string.match(reUnicode) || []; |
| 38 | } |
| 39 | |
| 40 | module.exports = unicodeToArray; |