import { ArrayType } from './types';

/** Subtract from shifted and summed UTF-16 surrogate pair code units to get
  * correct Unicode code point. Equals:
  * (0xd800 << 10) + 0xdc00 - 0x10000 */
const surrogateOffset = 0x35fdc00;

/** Single reference to String.fromCharCode for better minification. */
const fromCharCode = String.fromCharCode;

export function encodeUTF8(src: string): number[];
export function encodeUTF8(
	src: string,
	dst: ArrayType,
	dstPos?: number,
	srcPos?: number,
	srcEnd?: number
): number;

/** UTF-8 encode a string to an array of bytes.
  * This transform cannot fail and is reversible for any input string,
  * regardless of strange or invalid characters (handled using WTF-8).
  *
  * @param src String to encode.
  * @param dst Destination array or buffer for storing the result.
  * @param dstPos Initial offset to destination, default is 0.
  * @param srcPos Initial offset to source data, default is 0.
  * @param srcEnd Source data end offset, default is its length.
  *
  * @return End offset past data stored if a destination was given,
  * otherwise a numeric array containing the encoded result.
  * Note that output length cannot exceed 3 * input length. */

export function encodeUTF8(
	src: string,
	dst?: ArrayType,
	dstPos = 0,
	srcPos = 0,
	srcEnd = src.length
) {
	let result: number[] | undefined;
	let code: number;
	let a: number, b: number;

	dst = dst || (result = []);

	while(srcPos < srcEnd) {
		code = src.charCodeAt(srcPos++);

		if(code >= 0x80) {
			b = 0b11000000;

			if(code >= 0x800) {
				a = 0b11100000;
				b = 0b10000000;

				// Note: code <= 0xffff because JavaScript API exposes strings
				// only as a 16-bit, UTF-16 encoded buffer.

				if((code & 0xfc00) == 0xd800) {
					// Surrogate pair first half.
					const next = src.charCodeAt(srcPos) || 0;

					if((next & 0xfc00) == 0xdc00) {
						// Surrogate pair second half. Re-encode only if both
						// halves are in the valid range. Otherwise store them
						// as-is, to avoid altering decoded result.

						a = 0b10000000;
						code = (code << 10) + next - surrogateOffset;
						dst[dstPos++] = 0b11110000 | (code >> 18);
						++srcPos;
					}
				}

				dst[dstPos++] = a | ((code >> 12) & 0b00111111);
			}

			dst[dstPos++] = b | ((code >> 6) & 0b00111111);
			code = 0b10000000 | (code & 0b00111111);
		}

		dst[dstPos++] = code;
	}

	return result || dstPos;
}

/** UTF-8 decode an array of bytes into a string.
  * Invalid surrogate pairs are left as-is to support WTF-8.
  * All other invalid codes become replacement characters (fffd).
  *
  * @param src Array to encode.
  * @param dst Output string prefix, default is empty.
  * @param srcPos Initial offset to source data, default is 0.
  * @param srcEnd Source data end offset, default is its length.
  *
  * @return Decoded string. */

export function decodeUTF8(
	src: number[],
	dst = '',
	srcPos = 0,
	srcEnd = src.length
) {
	let code: number;
	let part: number;
	let mask: number | undefined;

	while(srcPos < srcEnd) {
		// TODO: Ensure code is below 0xff and also does not overflow u32 after decoding!
		code = src[srcPos++];

		if(code & 0b10000000) {
			mask = 0b1000000;

			if(code < 0b11000000 || code > 0b11111101) {
				// Excessive continuation byte(s) detected.
				// Consume them all.
				while((src[srcPos] & 0b11000000) == 0b10000000) {
					++srcPos;
				}

				// Emit a replacement character to signal error.
				code = 0xfffd;
			} else {
				do {
					// Read continuation byte.
					part = src[srcPos++];

					if((part & 0b11000000) != 0b10000000) {
						// Missing continuation byte.
						// Emit a replacement character to signal error.
						code = 0xfffd;
						mask = 0;
						--srcPos;
						break;
					}

					// Append to character code.
					code = (code << 6) | (part & 0b111111);
					// Move to next unary digit of byte length prefix.
					mask <<= 5;
				} while(code & mask);

				// Mask out byte length prefix.
				code &= mask - 1;
			}
		}

		if(code > 0xffff) {
			if(code > 0x10ffff) {
				// Out of range supported by UTF-16.
				code = 0xfffd;
			} else {
				dst += fromCharCode(0xd7c0 + (code >>> 10));
				code = 0xdc00 | (code & 0b1111111111);
			}
		}

		dst += fromCharCode(code);
	}

	return dst;
}