import { bs } from "../../../lib/as-bs"; import { BACK_SLASH, QUOTE } from "../../custom/chars"; import { SERIALIZE_ESCAPE_TABLE } from "../../globals/tables"; import { u16_to_hex4_swar } from "../../util/swar"; import { OBJECT, TOTAL_OVERHEAD } from "rt/common"; // @ts-expect-error: @lazy is a valid decorator @lazy const U00_MARKER = 13511005048209500; // @ts-expect-error: @lazy is a valid decorator @lazy const U_MARKER = 7667804; export function serializeString_SWAR(src: string): void { let srcStart = changetype(src); const srcInitial = srcStart; const srcSize = changetype(srcStart - TOTAL_OVERHEAD).rtSize; const srcEnd = srcStart + srcSize; do { const srcEnd8Fast = srcEnd - 8; bs.proposeSize(srcSize + 4); const dstStart = bs.offset; let dst = dstStart + 2; while (srcStart < srcEnd8Fast) { const block = load(srcStart); if ((block & 0xff00_ff00_ff00_ff00) != 0) break; const lo = block & 0x00ff_00ff_00ff_00ff; const asciiMask = ((lo - 0x0020_0020_0020_0020) | ((lo ^ 0x0022_0022_0022_0022) - 0x0001_0001_0001_0001) | ((lo ^ 0x005c_005c_005c_005c) - 0x0001_0001_0001_0001)) & (0x0080_0080_0080_0080 & ~lo); if (asciiMask != 0) break; store(dst, block); srcStart += 8; dst += 8; } if (srcStart < srcEnd8Fast) break; while (srcStart <= srcEnd - 2) { const code = load(srcStart); if (code > 0x7f || code == BACK_SLASH || code == QUOTE || code < 32) break; store(dst, code); srcStart += 2; dst += 2; } if (srcStart <= srcEnd - 2) break; store(dstStart, QUOTE); store(dst, QUOTE); bs.offset = dst + 2; return; } while (false); srcStart = srcInitial; const srcEnd8 = srcEnd - 8; bs.proposeSize(srcSize + 4); store(bs.offset, 34); // " bs.offset += 2; while (srcStart < srcEnd8) { const block = load(srcStart); let mask = detect_escapable_u64_swar_safe(block); store(bs.offset, block); if (mask === 0) { srcStart += 8; bs.offset += 8; continue; } do { const laneIdx = usize(ctz(mask) >> 3); const srcIdx = srcStart + laneIdx; // Even (0 2 4 6) -> Confirmed ASCII Escape // Odd (1 3 5 7) -> Possibly a Unicode code unit or surrogate if ((laneIdx & 1) === 0) { mask &= mask - 1; const code = load(srcIdx); const escaped = load(SERIALIZE_ESCAPE_TABLE + (code << 2)); if ((escaped & 0xffff) != BACK_SLASH) { bs.growSize(10); const dstIdx = bs.offset + laneIdx; store(dstIdx, U00_MARKER); store(dstIdx, escaped, 8); store(dstIdx, load(srcIdx, 2), 12); bs.offset += 10; } else { bs.growSize(2); const dstIdx = bs.offset + laneIdx; store(dstIdx, escaped); store(dstIdx, load(srcIdx, 2), 4); bs.offset += 2; } continue; } mask &= mask - 1; const code = load(srcIdx - 1); if (code < 0xd800 || code > 0xdfff) continue; if (code <= 0xdbff && srcIdx + 2 < srcEnd) { const next = load(srcIdx, 1); if (next >= 0xdc00 && next <= 0xdfff) { // paired surrogate // mask &= ~(0xFF << ((laneIdx+2) << 3)); mask &= mask - 1; continue; } } bs.growSize(10); // unpaired high/low surrogate const dstIdx = bs.offset + laneIdx - 1; store(dstIdx, U_MARKER); // \u store(dstIdx, u16_to_hex4_swar(code), 4); store(dstIdx, load(srcIdx, 1), 12); bs.offset += 10; } while (mask !== 0); srcStart += 8; bs.offset += 8; } while (srcStart <= srcEnd - 2) { const code = load(srcStart); if (code == BACK_SLASH || code == QUOTE || code < 32) { const escaped = load(SERIALIZE_ESCAPE_TABLE + (code << 2)); if ((escaped & 0xffff) != BACK_SLASH) { bs.growSize(10); store(bs.offset, U00_MARKER); store(bs.offset, escaped, 8); bs.offset += 12; } else { bs.growSize(2); store(bs.offset, escaped); bs.offset += 4; } srcStart += 2; continue; } if (code < 0xd800 || code > 0xdfff) { store(bs.offset, code); bs.offset += 2; srcStart += 2; continue; } if (code <= 0xdbff && srcStart + 2 <= srcEnd - 2) { const next = load(srcStart, 2); if (next >= 0xdc00 && next <= 0xdfff) { // valid surrogate pair store(bs.offset, code); store(bs.offset + 2, next); bs.offset += 4; srcStart += 4; continue; } } // unpaired high/low surrogate write_u_escape(code); srcStart += 2; continue; } store(bs.offset, 34); // " bs.offset += 2; } function write_u_escape(code: u16): void { bs.growSize(10); store(bs.offset, U_MARKER); // "\u" store(bs.offset, u16_to_hex4_swar(code), 4); bs.offset += 12; } export function detect_escapable_u64_swar_safe(block: u64): u64 { const hi = block & 0xff00_ff00_ff00_ff00; const lo = block & 0x00ff_00ff_00ff_00ff; // Setting bit 8 of each 16-bit lane (high byte LSB) prevents borrow from a // low byte underflow from propagating across lane boundaries into the next lane. const loSafe = lo | 0x0100_0100_0100_0100; const ascii_mask = ((loSafe - 0x0020_0020_0020_0020) | ((loSafe ^ 0x0022_0022_0022_0022) - 0x0001_0001_0001_0001) | ((loSafe ^ 0x005c_005c_005c_005c) - 0x0001_0001_0001_0001)) & (0x0080_0080_0080_0080 & ~lo); if (hi == 0) return ascii_mask; const hi_mask = ((block - 0x0100_0100_0100_0100) & ~block & 0x8000_8000_8000_8000) ^ 0x8000_8000_8000_8000; return (ascii_mask & (~hi_mask >> 8)) | hi_mask; }