// Copyright (C) 2016 Dmitry Chestnykh
// MIT License. See LICENSE file for details.

import { describe, expect, it } from 'vitest';
import * as hex from "@stablelib/hex";
import { encode, decode } from "./utf8.js";

describe("utf8", () => {
    it("should encode and decode strings", () => {
        const tests = [
            "abcdef",
            "☺☻☹",
            "абвгдеёжз",
            "abcгдеjzy123",
            "こんにちは世界",
            "test 测试 тест",
            "𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡",
            "❤️"
        ];
        const encoded = tests.map(encode);
        const decoded = encoded.map(decode);
        expect(decoded).toEqual(tests);
    });

    it("should not decode malformed bytes", () => {
        // Source: https://hsivonen.fi/broken-utf-8/test.html
        const tests = [
            // Non-shortest forms for lowest single-byte (U+0000)
            "C0 80",
            "E0 80 80",
            "F0 80 80 80",
            "F8 80 80 80 80",
            "FC 80 80 80 80 80",
            // Non-shortest forms for highest single-byte (U+007F)
            "C1 BF",
            "E0 81 BF",
            "F0 80 81 BF",
            "F8 80 80 81 BF",
            "FC 80 80 80 81 BF",
            // Non-shortest forms for lowest two-byte (U+0080)
            "E0 82 80",
            "F0 80 82 80",
            "F8 80 80 82 80",
            "FC 80 80 80 82 80",
            // Non-shortest forms for highest two-byte (U+07FF)
            "E0 9F BF",
            "F0 80 9F BF",
            "F8 80 80 9F BF",
            "FC 80 80 80 9F BF",
            // Non-shortest forms for lowest three-byte (U+0800)
            "F0 80 A0 80",
            "F8 80 80 A0 80",
            "FC 80 80 80 A0 80",
            // Non-shortest forms for highest three-byte (U+FFFF)
            "F0 8F BF BF",
            "F8 80 8F BF BF",
            "FC 80 80 8F BF BF",
            // Non-shortest forms for lowest four-byte (U+10000)
            "F8 80 90 80 80",
            "FC 80 80 90 80 80",
            // Non-shortest forms for last Unicode (U+10FFFF)
            "F8 84 8F BF BF",
            "FC 80 84 8F BF BF",
            // Out of range
            "F4 90 80 80",
            "FB BF BF BF BF",
            "FD BF BF BF BF BF",
            "ED A0 80",
            "ED BF BF",
            "ED A0 BD ED B2 A9",
            // Out of range and non-shortest
            "F8 84 90 80 80",
            "FC 80 84 90 80 80",
            "F0 8D A0 80",
            "F0 8D BF BF",
            "F0 8D A0 BD F0 8D B2 A9",
            // Lone trails
            "80",
            "80 80",
            "80 80 80",
            "80 80 80 80",
            "80 80 80 80 80",
            "80 80 80 80 80 80",
            "80 80 80 80 80 80 80",
            "C2 B6 80",
            "E2 98 83 80",
            "F0 9F 92 A9 80",
            "FB BF BF BF BF 80",
            "FD BF BF BF BF BF 80",
            // Truncated sequences
            "C2",
            "E2",
            "E2 98",
            "F0",
            "F0 9F",
            "F0 9F 92",
            // Leftovers
            "FE",
            "FE 80",
            "FF",
            "FF 80"
        ];
        tests.forEach((s, i) => {
            const b = hex.decode(s.replace(/ /g, ""));
            expect(() => {
                const x = decode(b);
                // The following will only run in case of unsuccessful test:
                console.log(i, "should not have decoded", s, "to", x);
            }).toThrowError(/invalid/);
        });
    });

    it("should decode a huge string", () => {
        let s = "";
        for (let i = 0; i < 1024 * 1024; i++) {
            s += "это test";
        }
        const enc = encode(s);
        const dec = decode(enc);
        expect(dec).toEqual(s);
    });

    it("should reject invalid UTF-16 strings with unpaired surrogates", () => {
        // High surrogate without low surrogate
        expect(() => encode('\ud800')).toThrowError(/invalid string/);
        expect(() => encode('\udbff')).toThrowError(/invalid string/);

        // Low surrogate without high surrogate
        expect(() => encode('\udc00')).toThrowError(/invalid string/);
        expect(() => encode('\udfff')).toThrowError(/invalid string/);

        // High surrogate at the end of string
        expect(() => encode('hello\ud800')).toThrowError(/invalid string/);

        // Low surrogate at the beginning
        expect(() => encode('\udc00world')).toThrowError(/invalid string/);

        // Two high surrogates in a row (second one is unpaired)
        expect(() => encode('\ud800\ud800\udc00')).toThrowError(/invalid string/);

        // Low surrogate followed by high surrogate (wrong order)
        expect(() => encode('\udc00\ud800')).toThrowError(/invalid string/);

        // Valid surrogate pair should work
        expect(() => encode('\ud800\udc00')).not.toThrow(); // U+10000
        expect(() => encode('\udbff\udfff')).not.toThrow(); // U+10FFFF
    });
});