import { BACK_SLASH, BRACE_LEFT, BRACE_RIGHT, BRACKET_LEFT, BRACKET_RIGHT, COLON, COMMA, QUOTE, } from "../custom/chars"; import { isSpace } from "./isSpace"; // SWAR analogue of `scanValueEndSimd.ts`, processing four UTF-16 lanes per // 64-bit word for the SWAR build mode (no SIMD feature). Each mask is a fast // FILTER - a matched lane is re-checked with a real `load` before acting - // so the masks may over-match non-ASCII lanes whose low byte equals a target // (the verify rejects them). Lane byte offset within a hit word is // `ctz(mask) >> 3` (detection bit sits at lane*16 + 7). const ONES: u64 = 0x0001_0001_0001_0001; const HI: u64 = 0x0080_0080_0080_0080; // 16-bit-lane "equals" partials (pre-`& HI`); OR several, then `& HI` once. function eqPart(block: u64, splat: u64): u64 { const t = block ^ splat; return (t - ONES) & ~t; } const S_QUOTE: u64 = 0x0022_0022_0022_0022; const S_BACK_SLASH: u64 = 0x005c_005c_005c_005c; const S_BRACKET_LEFT: u64 = 0x005b_005b_005b_005b; const S_BRACKET_RIGHT: u64 = 0x005d_005d_005d_005d; // Clears bit 5 (0x20) of each lane, folding `{`/`}` onto `[`/`]`. const FOLD: u64 = 0xffdf_ffdf_ffdf_ffdf; function quoteOrBackslashMask(block: u64): u64 { return (eqPart(block, S_QUOTE) | eqPart(block, S_BACK_SLASH)) & HI; } // Filter for lanes equal to `"`, `{`, `}`, `[`, or `]` - the only bytes that, // outside a string, change depth or open a string. As with the other SWAR // masks, a hit is a candidate to verify with a real load (it may over-match a // non-ASCII lane whose low byte collides). function structuralOrQuoteMask(block: u64): u64 { const folded = block & FOLD; return ( (eqPart(folded, S_BRACKET_LEFT) | eqPart(folded, S_BRACKET_RIGHT) | eqPart(block, S_QUOTE)) & HI ); } function scanQuotedValueEnd_SWAR(srcStart: usize, srcEnd: usize): usize { srcStart += 2; const srcEnd8 = srcEnd >= 8 ? srcEnd - 8 : 0; // Fast-skip 8-byte windows until a real quote (return) or a backslash, then // hand off to the precise scalar tail (which resolves escape runs). The mask // is a filter, so each candidate lane is verified with a real `load`; // non-ASCII lanes that spuriously match are skipped (neither quote nor // backslash), and `srcStart` is left at the window start for the tail. while (srcStart <= srcEnd8) { let mask = quoteOrBackslashMask(load(srcStart)); if (mask == 0) { srcStart += 8; continue; } do { const srcIdx = srcStart + (usize(ctz(mask)) >> 3); mask &= mask - 1; const code = load(srcIdx); if (code == QUOTE) return srcIdx + 2; if (code == BACK_SLASH) break; } while (mask != 0); break; } // Resolve escapes by consuming a backslash *and the char it escapes* together, // so escape parity is tracked exactly. A look-back `prev != BACK_SLASH` test is // wrong for an escaped backslash: in `"x\\"` the closing quote follows a `\` // (the second of the pair) yet still closes the string. while (srcStart < srcEnd) { const code = load(srcStart); if (code == BACK_SLASH) { srcStart += 4; continue; } if (code == QUOTE) return srcStart + 2; srcStart += 2; } return 0; } function scanCompositeValueEnd_SWAR(srcStart: usize, srcEnd: usize): usize { // Process structural tokens scalar-side, but bulk-skip the bytes between them: // nested string VALUES via the SWAR quoted scan, and runs of digits / // punctuation / whitespace (numeric arrays) via a SWAR hunt for the next // `"`/`{`/`}`/`[`/`]`. let depth: i32 = 1; let ptr = srcStart + 2; const srcEnd8 = srcEnd >= 8 ? srcEnd - 8 : 0; while (ptr < srcEnd) { const code = load(ptr); if (code == QUOTE) { ptr = scanQuotedValueEnd_SWAR(ptr, srcEnd); if (!ptr) return 0; continue; } const folded = code & 0xffdf; if (folded == BRACKET_LEFT) { depth++; ptr += 2; continue; } if (folded == BRACKET_RIGHT) { if (--depth == 0) return ptr + 2; ptr += 2; continue; } ptr += 2; // `,`/`:` sit one byte from the next token - stay scalar (string-dense // objects); other fillers can run long, so SWAR-skip past them. if (code == COMMA || code == COLON) continue; while (ptr <= srcEnd8) { const mask = structuralOrQuoteMask(load(ptr)); if (mask == 0) { ptr += 8; continue; } const idx = ptr + (usize(ctz(mask)) >> 3); const c = load(idx); const f = c & 0xffdf; if (c == QUOTE || f == BRACKET_LEFT || f == BRACKET_RIGHT) { ptr = idx; // real token - the outer loop processes it break; } ptr = idx + 2; // spurious lane match - keep scanning } } return 0; } function scanScalarValueEnd_SWAR(srcStart: usize, srcEnd: usize): usize { // Scalars (number/true/false/null) are short, so a plain scalar terminator // scan beats setting up SWAR masks per word. while (srcStart < srcEnd) { const code = load(srcStart); if ( code == COMMA || code == BRACKET_RIGHT || code == BRACE_RIGHT || isSpace(code) ) return srcStart; srcStart += 2; } return srcStart; } /** * SWAR `scanValueEnd`: position just past the value at `srcStart`. Strings and * objects/arrays use the SWAR token scans above; scalars use a short scalar * loop. Returns 0 on empty input or an unterminated string/composite. */ export function scanValueEnd_SWAR(srcStart: usize, srcEnd: usize): usize { if (srcStart >= srcEnd) return 0; const first = load(srcStart); if (isString>() && first == QUOTE) return scanQuotedValueEnd_SWAR(srcStart, srcEnd); if (isArray>() && first == BRACKET_LEFT) return scanCompositeValueEnd_SWAR(srcStart, srcEnd); if ( (isManaged>() || isReference>()) && first == BRACE_LEFT ) return scanCompositeValueEnd_SWAR(srcStart, srcEnd); if (first == QUOTE) return scanQuotedValueEnd_SWAR(srcStart, srcEnd); if (first == BRACE_LEFT || first == BRACKET_LEFT) return scanCompositeValueEnd_SWAR(srcStart, srcEnd); return scanScalarValueEnd_SWAR(srcStart, srcEnd); }