import { bs } from "../../../lib/as-bs"; import { OBJECT, TOTAL_OVERHEAD } from "rt/common"; import { __heap_base } from "memory"; import { BACK_SLASH, QUOTE } from "../../custom/chars"; import { DESERIALIZE_ESCAPE_TABLE } from "../../globals/tables"; import { hex4_to_u16_swar } from "../../util/swar"; // Overflow Pattern for Unicode Escapes (READ) // \u0001 0 \u00|01__ + 4 // -\u0001 2 -\u0|001_ + 6 // --\u0001 4 --\u|0001 + 8 // ---\u0001 6 ---\|u0001 + 10 // Formula: overflow = lane + 4 // Overflow Pattern for Unicode Escapes (WRITE) // * = escape, _ = empty // \u0001 0 *___| - 6 // -\u0001 2 -*__| - 4 // --\u0001 4 --*_| - 2 // ---\u0001 6 ---*| - 0 // Formula: 6 - lane // Overflow pattern for Short Escapes (READ) // \n-- 0 \n--| + 0 // -\n 2 -\n-| + 0 // --\n 4 --\n| + 0 // ---\n 6 ---\|n + 2 // Formula: overflow = |lane - 4| // Overflow pattern for Short Escapes (WRITE) // * = escape, _ = empty // \n-- 0 *--_ - 2 // -\n- 2 -*-_ - 2 // --\n 4 --*_ - 2 // ---\n 6 ---* - 0 // Formula: overflow = /** * Deserializes strings back into into their original form using SIMD operations * @param src string to deserialize * @param dst buffer to write to * @returns number of bytes written */ function copyStringFromSource(srcStart: usize, byteLength: usize): string { if (byteLength == 0) return changetype(""); const out = __new(byteLength, idof()); memory.copy(out, srcStart, byteLength); return changetype(out); } // Standalone (whole-value) escaped scanner. Quotes are already stripped, so // `srcEnd` is the payload end and only `\` escapes are handled. Same HYBRID // strategy as the field scanner: escape blocks use a free optimistic u64 store // for the plain prefix; clean runs stream the first block then bulk-memcpy the // remainder. The `backslash_mask_unsafe` hits are confirmed scalarly. // // NOTE: vs the prior overflow scanner this is faster on dense and sparse // escaping but ~20% slower on sustained moderate-density escaping (escape // every ~20 chars), where multi-escape-per-block had an edge. function deserializeEscapedString_SWAR( payloadStart: usize, escapeStart: usize, srcEnd: usize, ): string { const prefixLen = (escapeStart - payloadStart); const outStart = bs.offset - bs.buffer; bs.ensureSize((srcEnd - payloadStart) + 8); // +8 slack for u64 overcopy if (prefixLen != 0) { memory.copy(bs.offset, payloadStart, prefixLen); bs.offset += prefixLen; } let srcStart = escapeStart; const srcEnd8 = srcEnd >= 8 ? srcEnd - 8 : 0; while (srcStart <= srcEnd8) { const block = load(srcStart); let mask = backslash_mask_unsafe(block); if (mask == 0) { store(bs.offset, block); bs.offset += 8; srcStart += 8; if ( srcStart <= srcEnd8 && backslash_mask_unsafe(load(srcStart)) == 0 ) { const runStart = srcStart; srcStart += 8; while ( srcStart <= srcEnd8 && backslash_mask_unsafe(load(srcStart)) == 0 ) { srcStart += 8; } const runLen = (srcStart - runStart); memory.copy(bs.offset, runStart, runLen); bs.offset += runLen; } continue; } store(bs.offset, block); let handled = false; do { const laneIdx = usize(ctz(mask) >> 3); mask &= mask - 1; const srcIdx = srcStart + laneIdx; if ((load(srcIdx) & 0xffff) !== 0x5c) continue; // false positive bs.offset += laneIdx; const code = load(srcIdx, 2); if (code !== 0x75) { store(bs.offset, load(DESERIALIZE_ESCAPE_TABLE + code)); bs.offset += 2; srcStart = srcIdx + 4; } else { store(bs.offset, hex4_to_u16_swar(load(srcIdx, 4))); bs.offset += 2; srcStart = srcIdx + 12; } handled = true; break; } while (mask != 0); if (!handled) { bs.offset += 8; srcStart += 8; } } while (srcStart < srcEnd) { const char = load(srcStart); if (char != BACK_SLASH) { store(bs.offset, char); bs.offset += 2; srcStart += 2; continue; } const code = load(srcStart, 2); if (code !== 0x75) { store(bs.offset, load(DESERIALIZE_ESCAPE_TABLE + code)); bs.offset += 2; srcStart += 4; } else { store(bs.offset, hex4_to_u16_swar(load(srcStart, 4))); bs.offset += 2; srcStart += 12; } } return bs.sliceOut(outStart); } export function deserializeString_SWAR(srcStart: usize, srcEnd: usize): string { // Strip quotes srcStart += 2; srcEnd -= 2; const payloadStart = srcStart; do { const srcEnd16Fast = srcEnd - 16; while (srcStart < srcEnd16Fast) { const m0 = backslash_mask_unsafe(load(srcStart)); const m1 = backslash_mask_unsafe(load(srcStart, 8)); if ((m0 | m1) != 0) break; srcStart += 16; } if (srcStart < srcEnd16Fast) break; while (srcStart < srcEnd) { if (load(srcStart) == BACK_SLASH) break; srcStart += 2; } if (srcStart < srcEnd) break; return copyStringFromSource(payloadStart, srcEnd - payloadStart); } while (false); srcStart = payloadStart; const srcEnd8 = srcEnd - 8; while (srcStart < srcEnd8) { const block = load(srcStart); let mask = backslash_mask_unsafe(block); if (mask === 0) { srcStart += 8; continue; } do { const laneIdx = usize(ctz(mask) >> 3); mask &= mask - 1; const srcIdx = srcStart + laneIdx; const header = load(srcIdx); // Detect false positive (code unit where low byte is 0x5C) if ((header & 0xffff) !== 0x5c) continue; return deserializeEscapedString_SWAR(payloadStart, srcIdx, srcEnd); } while (mask !== 0); srcStart += 8; } while (srcStart < srcEnd) { if (load(srcStart) == BACK_SLASH) { return deserializeEscapedString_SWAR(payloadStart, srcStart, srcEnd); } srcStart += 2; } return copyStringFromSource(payloadStart, srcEnd - payloadStart); } // Writes into the destination field, reusing or resizing the backing string. function writeStringToField( dstFieldPtr: usize, srcStart: usize, byteLength: u32, ): void { if (byteLength == 0) { store(dstFieldPtr, changetype("")); return; } const current = load(dstFieldPtr); let stringPtr: usize; if (current >= __heap_base) { if (changetype(current - TOTAL_OVERHEAD).rtSize == byteLength) { stringPtr = current; } else { stringPtr = __renew(current, byteLength); store(dstFieldPtr, stringPtr); } } else { stringPtr = __new(byteLength, idof()); store(dstFieldPtr, stringPtr); } memory.copy(stringPtr, srcStart, byteLength); } // Scans a quoted string value, writes into the destination field, and returns // the next unread src pointer. // // HYBRID strategy (validated against the prior run-copy scanner across escape // densities - see __benches__/custom/swar-string-deser-hybrid-h2h: +17–70%): // * Escape-bearing block: one optimistic whole-block u64 store copies the // plain prefix for free, then the (scalar-confirmed) escape is decoded. // * Clean block: stream the first one, then if the clean run continues switch // to one bulk memory.copy for the remainder. // SWAR masks carry high-byte false positives, so each hit is confirmed // scalarly before acting. function deserializeEscapedStringField_SWAR( payloadStart: usize, escapeStart: usize, srcEnd: usize, dstFieldPtr: usize, ): usize { const prefixLen = (escapeStart - payloadStart); bs.offset = bs.buffer; bs.ensureSize((srcEnd - payloadStart) + 8); // +8 slack for u64 overcopy if (prefixLen != 0) { memory.copy(bs.buffer, payloadStart, prefixLen); bs.offset += prefixLen; } let srcStart = escapeStart; const srcEnd8 = srcEnd >= 8 ? srcEnd - 8 : 0; while (srcStart <= srcEnd8) { const block = load(srcStart); let mask = backslash_or_quote_mask(block); if (mask == 0) { store(bs.offset, block); bs.offset += 8; srcStart += 8; if ( srcStart <= srcEnd8 && backslash_or_quote_mask(load(srcStart)) == 0 ) { const runStart = srcStart; srcStart += 8; while ( srcStart <= srcEnd8 && backslash_or_quote_mask(load(srcStart)) == 0 ) { srcStart += 8; } const runLen = (srcStart - runStart); memory.copy(bs.offset, runStart, runLen); bs.offset += runLen; } continue; } // Escape/quote block (mask may carry high-byte false positives). store(bs.offset, block); let handled = false; do { const laneIdx = usize(ctz(mask) >> 3); mask &= mask - 1; const srcIdx = srcStart + laneIdx; const char = load(srcIdx); if (char != QUOTE && char != BACK_SLASH) continue; // false positive bs.offset += laneIdx; if (char == QUOTE) { writeStringToField( dstFieldPtr, bs.buffer, (bs.offset - bs.buffer), ); bs.offset = bs.buffer; return srcIdx + 2; } const code = load(srcIdx, 2); if (code !== 0x75) { store(bs.offset, load(DESERIALIZE_ESCAPE_TABLE + code)); bs.offset += 2; srcStart = srcIdx + 4; } else { store(bs.offset, hex4_to_u16_swar(load(srcIdx, 4))); bs.offset += 2; srcStart = srcIdx + 12; } handled = true; break; } while (mask != 0); if (!handled) { bs.offset += 8; srcStart += 8; } } // scalar tail (< 8 bytes remaining) while (srcStart < srcEnd) { const char = load(srcStart); if (char == QUOTE) { writeStringToField(dstFieldPtr, bs.buffer, (bs.offset - bs.buffer)); bs.offset = bs.buffer; return srcStart + 2; } if (char != BACK_SLASH) { store(bs.offset, char); bs.offset += 2; srcStart += 2; continue; } const code = load(srcStart, 2); if (code !== 0x75) { store(bs.offset, load(DESERIALIZE_ESCAPE_TABLE + code)); bs.offset += 2; srcStart += 4; } else { store(bs.offset, hex4_to_u16_swar(load(srcStart, 4))); bs.offset += 2; srcStart += 12; } } bs.offset = bs.buffer; abort("Unterminated string literal"); return srcStart; } function deserializeEscapedStringContinuation_SWAR_MergedTuned( lastPtr: usize, srcStart: usize, srcEnd: usize, dstFieldPtr: usize, ): usize { const srcEnd8 = srcEnd - 8; while (srcStart <= srcEnd8) { const blockStart = srcStart; let mask = backslash_or_quote_mask(load(srcStart)); if (mask === 0) { srcStart += 8; continue; } do { const laneIdx = usize(ctz(mask) >> 3); mask &= mask - 1; const srcIdx = srcStart + laneIdx; const char = load(srcIdx); if (char == QUOTE) { const runLen = (srcIdx - lastPtr); if (runLen != 0) { memory.copy(bs.offset, lastPtr, runLen); bs.offset += runLen; } writeStringToField( dstFieldPtr, bs.buffer, (bs.offset - bs.buffer), ); bs.offset = bs.buffer; return srcIdx + 2; } if (char != BACK_SLASH) continue; const runLen = (srcIdx - lastPtr); if (runLen != 0) { memory.copy(bs.offset, lastPtr, runLen); bs.offset += runLen; } const chunk = load(srcIdx); const code = (chunk >> 16); if (code !== 0x75) { store(bs.offset, load(DESERIALIZE_ESCAPE_TABLE + code)); bs.offset += 2; lastPtr = srcIdx + 4; } else { store(bs.offset, hex4_to_u16_swar(load(srcIdx, 4))); bs.offset += 2; lastPtr = srcIdx + 12; } srcStart = lastPtr; break; } while (mask !== 0); if (srcStart == blockStart) srcStart += 8; } while (srcStart < srcEnd) { const tailChar = load(srcStart); if (tailChar == QUOTE) { const runLen = (srcStart - lastPtr); if (runLen != 0) { memory.copy(bs.offset, lastPtr, runLen); bs.offset += runLen; } writeStringToField(dstFieldPtr, bs.buffer, (bs.offset - bs.buffer)); bs.offset = bs.buffer; return srcStart + 2; } if (tailChar != BACK_SLASH) { srcStart += 2; continue; } const runLen = (srcStart - lastPtr); if (runLen != 0) { memory.copy(bs.offset, lastPtr, runLen); bs.offset += runLen; } const tailCode = load(srcStart, 2); if (tailCode !== 0x75) { store(bs.offset, load(DESERIALIZE_ESCAPE_TABLE + tailCode)); bs.offset += 2; srcStart += 4; } else { store(bs.offset, hex4_to_u16_swar(load(srcStart, 4))); bs.offset += 2; srcStart += 12; } lastPtr = srcStart; } bs.offset = bs.buffer; return srcStart; } export function deserializeStringField_SWAR( srcStart: usize, srcEnd: usize, dstObj: usize, dstOffset: usize = 0, ): usize { const dstFieldPtr = dstObj + dstOffset; if (srcStart + 2 > srcEnd || load(srcStart) != QUOTE) abort("Expected leading quote"); const payloadStart = srcStart + 2; srcStart = payloadStart; // Wide pre-scan: skip 16 bytes per iter while both halves are clean. The // common case (plain ASCII payloads, no escape) hits this loop exclusively // and is bound by load+SWAR throughput, not branch frequency. if (srcEnd >= 16) { const srcEnd16 = srcEnd - 16; while (srcStart <= srcEnd16) { // Test the first word before loading the second: short values and keys // close (or escape) within the first 8 bytes, so this skips the second // load on the common case while still skipping 16 bytes when both clean. if (backslash_or_quote_mask(load(srcStart)) != 0) break; if (backslash_or_quote_mask(load(srcStart, 8)) != 0) { srcStart += 8; break; } srcStart += 16; } } const srcEnd8 = srcEnd - 8; while (srcStart <= srcEnd8) { let mask = backslash_or_quote_mask(load(srcStart)); if (mask === 0) { srcStart += 8; continue; } do { const laneIdx = usize(ctz(mask) >> 3); mask &= mask - 1; const srcIdx = srcStart + laneIdx; const char = load(srcIdx); if (char == QUOTE) { writeStringToField( dstFieldPtr, payloadStart, (srcIdx - payloadStart), ); return srcIdx + 2; } if (char != BACK_SLASH) continue; return deserializeEscapedStringField_SWAR( payloadStart, srcIdx, srcEnd, dstFieldPtr, ); } while (mask !== 0); srcStart += 8; } while (srcStart < srcEnd) { const char = load(srcStart); if (char == QUOTE) { writeStringToField( dstFieldPtr, payloadStart, (srcStart - payloadStart), ); return srcStart + 2; } if (char == BACK_SLASH) { return deserializeEscapedStringField_SWAR( payloadStart, srcStart, srcEnd, dstFieldPtr, ); } srcStart += 2; } abort("Unterminated string literal"); return srcStart; } /** * Computes a per-byte mask identifying ASCII backslash or quote bytes. * * WARNING: Matches in the high byte of a UTF-16 code unit are not filtered, * so callers must confirm the hit scalarly. * Each matching lane sets itself to 0x80. */ function backslash_or_quote_mask(block: u64): u64 { const b = block ^ 0x005c_005c_005c_005c; const q = block ^ 0x0022_0022_0022_0022; return ( (((q - 0x0001_0001_0001_0001) & ~q) | ((b - 0x0001_0001_0001_0001) & ~b)) & 0x0080_0080_0080_0080 ); } /** * Computes a per-lane mask identifying UTF-16 code units whose **low byte** * is the ASCII backslash (`'\\'`, 0x5C). * * The mask is produced in two stages: * 1. Detects bytes equal to 0x5C using a SWAR equality test. * 2. Clears matches where 0x5C appears in the **high byte** of a UTF-16 code unit, * ensuring only valid low-byte backslashes are reported. * * Each matching lane sets itself to 0x80. */ function backslash_mask(block: u64): u64 { const b = block ^ 0x005c_005c_005c_005c; const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080; const high_byte_mask = ~( ((block - 0x0100_0100_0100_0100) & ~block & 0x8000_8000_8000_8000) ^ 0x8000_8000_8000_8000 ) >> 8; return backslash_mask & high_byte_mask; } /** * Computes a per-lane mask identifying UTF-16 code units whose **low byte** * is the ASCII backslash (`'\\'`, 0x5C). * * Each matching lane sets itself to 0x80. * * WARNING: The low byte of a code unit *may* be a backslash, thus triggering false positives! * This is useful for a hot path where it is possible to detect the false positive scalarly. */ function backslash_mask_unsafe(block: u64): u64 { const b = block ^ 0x005c_005c_005c_005c; const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080; return backslash_mask; }