Blame - node_modules/iconv-lite/encodings/utf16.js - KorAP/Kalamar - Gitiles

blob: 54765aeee2f11ec423c0b719cd424bed876d6402 [file] [log] [blame]

Leo Repp	58b9f11	2021-11-22 11:57:47 +0100	[diff] [blame^]	1	"use strict";
				2	var Buffer = require("safer-buffer").Buffer;
				3
				4	// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
				5
				6	// == UTF16-BE codec. ==========================================================
				7
				8	exports.utf16be = Utf16BECodec;
				9	function Utf16BECodec() {
				10	}
				11
				12	Utf16BECodec.prototype.encoder = Utf16BEEncoder;
				13	Utf16BECodec.prototype.decoder = Utf16BEDecoder;
				14	Utf16BECodec.prototype.bomAware = true;
				15
				16
				17	// -- Encoding
				18
				19	function Utf16BEEncoder() {
				20	}
				21
				22	Utf16BEEncoder.prototype.write = function(str) {
				23	var buf = Buffer.from(str, 'ucs2');
				24	for (var i = 0; i < buf.length; i += 2) {
				25	var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
				26	}
				27	return buf;
				28	}
				29
				30	Utf16BEEncoder.prototype.end = function() {
				31	}
				32
				33
				34	// -- Decoding
				35
				36	function Utf16BEDecoder() {
				37	this.overflowByte = -1;
				38	}
				39
				40	Utf16BEDecoder.prototype.write = function(buf) {
				41	if (buf.length == 0)
				42	return '';
				43
				44	var buf2 = Buffer.alloc(buf.length + 1),
				45	i = 0, j = 0;
				46
				47	if (this.overflowByte !== -1) {
				48	buf2[0] = buf[0];
				49	buf2[1] = this.overflowByte;
				50	i = 1; j = 2;
				51	}
				52
				53	for (; i < buf.length-1; i += 2, j+= 2) {
				54	buf2[j] = buf[i+1];
				55	buf2[j+1] = buf[i];
				56	}
				57
				58	this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
				59
				60	return buf2.slice(0, j).toString('ucs2');
				61	}
				62
				63	Utf16BEDecoder.prototype.end = function() {
				64	}
				65
				66
				67	// == UTF-16 codec =============================================================
				68	// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
				69	// Defaults to UTF-16LE, as it's prevalent and default in Node.
				70	// http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
				71	// Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});
				72
				73	// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
				74
				75	exports.utf16 = Utf16Codec;
				76	function Utf16Codec(codecOptions, iconv) {
				77	this.iconv = iconv;
				78	}
				79
				80	Utf16Codec.prototype.encoder = Utf16Encoder;
				81	Utf16Codec.prototype.decoder = Utf16Decoder;
				82
				83
				84	// -- Encoding (pass-through)
				85
				86	function Utf16Encoder(options, codec) {
				87	options = options \|\| {};
				88	if (options.addBOM === undefined)
				89	options.addBOM = true;
				90	this.encoder = codec.iconv.getEncoder('utf-16le', options);
				91	}
				92
				93	Utf16Encoder.prototype.write = function(str) {
				94	return this.encoder.write(str);
				95	}
				96
				97	Utf16Encoder.prototype.end = function() {
				98	return this.encoder.end();
				99	}
				100
				101
				102	// -- Decoding
				103
				104	function Utf16Decoder(options, codec) {
				105	this.decoder = null;
				106	this.initialBytes = [];
				107	this.initialBytesLen = 0;
				108
				109	this.options = options \|\| {};
				110	this.iconv = codec.iconv;
				111	}
				112
				113	Utf16Decoder.prototype.write = function(buf) {
				114	if (!this.decoder) {
				115	// Codec is not chosen yet. Accumulate initial bytes.
				116	this.initialBytes.push(buf);
				117	this.initialBytesLen += buf.length;
				118
				119	if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
				120	return '';
				121
				122	// We have enough bytes -> detect endianness.
				123	var buf = Buffer.concat(this.initialBytes),
				124	encoding = detectEncoding(buf, this.options.defaultEncoding);
				125	this.decoder = this.iconv.getDecoder(encoding, this.options);
				126	this.initialBytes.length = this.initialBytesLen = 0;
				127	}
				128
				129	return this.decoder.write(buf);
				130	}
				131
				132	Utf16Decoder.prototype.end = function() {
				133	if (!this.decoder) {
				134	var buf = Buffer.concat(this.initialBytes),
				135	encoding = detectEncoding(buf, this.options.defaultEncoding);
				136	this.decoder = this.iconv.getDecoder(encoding, this.options);
				137
				138	var res = this.decoder.write(buf),
				139	trail = this.decoder.end();
				140
				141	return trail ? (res + trail) : res;
				142	}
				143	return this.decoder.end();
				144	}
				145
				146	function detectEncoding(buf, defaultEncoding) {
				147	var enc = defaultEncoding \|\| 'utf-16le';
				148
				149	if (buf.length >= 2) {
				150	// Check BOM.
				151	if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
				152	enc = 'utf-16be';
				153	else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
				154	enc = 'utf-16le';
				155	else {
				156	// No BOM found. Try to deduce encoding from initial content.
				157	// Most of the time, the content has ASCII chars (U+00), but the opposite (U+00) is uncommon.
				158	// So, we count ASCII as if it was LE or BE, and decide from that.
				159	var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
				160	_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
				161
				162	for (var i = 0; i < _len; i += 2) {
				163	if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
				164	if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
				165	}
				166
				167	if (asciiCharsBE > asciiCharsLE)
				168	enc = 'utf-16be';
				169	else if (asciiCharsBE < asciiCharsLE)
				170	enc = 'utf-16le';
				171	}
				172	}
				173
				174	return enc;
				175	}
				176
				177