{"version":3,"sources":["../../src/index.ts","../../src/pseudo_url.ts"],"sourcesContent":["export * from './pseudo_url';\n","import { inspect } from 'node:util';\n\nimport log from '@apify/log';\n\n/**\n * Represents a pseudo-URL (PURL) - a URL pattern used to find\n * the matching URLs on a page or html document.\n *\n * A PURL is simply a URL with special directives enclosed in `[]` brackets.\n * Currently, the only supported directive is `[RegExp]`,\n * which defines a JavaScript-style regular expression to match against the URL.\n *\n * The `PseudoUrl` class can be constructed either using a pseudo-URL string\n * or a regular expression (an instance of the `RegExp` object).\n * With a pseudo-URL string, the matching is always case-insensitive.\n * If you need case-sensitive matching, use an appropriate `RegExp` object.\n *\n * Internally, `PseudoUrl` class is using `purlToRegExp` function which parses the provided PURL\n * and converts it to an instance of the `RegExp` object (in case it's not).\n *\n * For example, a PURL `http://www.example.com/pages/[(\\w|-)*]` will match all of the following URLs:\n *\n * - `http://www.example.com/pages/`\n * - `http://www.example.com/pages/my-awesome-page`\n * - `http://www.example.com/pages/something`\n *\n * Be careful to correctly escape special characters in the pseudo-URL string.\n * If either `[` or `]` is part of the normal query string, it must be encoded as `[\\x5B]` or `[\\x5D]`,\n * respectively. For example, the following PURL:\n * ```http\n * http://www.example.com/search?do[\\x5B]load[\\x5D]=1\n * ```\n * will match the URL:\n * ```http\n * http://www.example.com/search?do[load]=1\n * ```\n *\n * If the regular expression in the pseudo-URL contains a backslash character (\\),\n * you need to escape it with another back backslash, as shown in the example below.\n *\n * **Example usage:**\n *\n * ```javascript\n * // Using a pseudo-URL string\n * const purl = new PseudoUrl('http://www.example.com/pages/[(\\\\w|-)+]');\n *\n * // Using a regular expression\n * const purl2 = new PseudoUrl(/http:\\/\\/www\\.example\\.com\\/pages\\/(\\w|-)+/);\n *\n * if (purl.matches('http://www.example.com/pages/my-awesome-page')) console.log('Match!');\n * ```\n * @category Sources\n */\nexport class PseudoUrl {\n    readonly regex: RegExp;\n\n    /**\n     * @param purl\n     *   A pseudo-URL string or a regular expression object.\n     *   Using a `RegExp` instance enables more granular control,\n     *   such as making the matching case-sensitive.\n     */\n    constructor(purl: string | RegExp) {\n        if (purl instanceof RegExp) {\n            this.regex = purl;\n        } else if (typeof purl === 'string') {\n            this.regex = purlToRegExp(purl);\n            log.debug('PURL parsed', { purl, regex: this.regex });\n        } else {\n            const type = Array.isArray(purl) ? 'array' : typeof purl;\n            throw new Error(\n                `Invalid PseudoUrl format, 'string' or 'RegExp' required, got \\`${inspect(purl)}\\` of type '${type}' instead`,\n            );\n        }\n    }\n\n    /**\n     * Determines whether a URL matches this pseudo-URL pattern.\n     */\n    matches(url: string): boolean {\n        return (typeof url as unknown) === 'string' && url.match(this.regex) !== null;\n    }\n}\n\n/**\n * Parses PURL into Regex string.\n */\nexport function purlToRegExp(purl: string): RegExp {\n    const trimmedPurl = purl.trim();\n    if (trimmedPurl.length === 0) throw new Error(`Cannot parse PURL '${trimmedPurl}': it must be an non-empty string`);\n\n    let regex = '^';\n\n    try {\n        let openBrackets = 0;\n        for (let i = 0; i < trimmedPurl.length; i++) {\n            const ch = trimmedPurl.charAt(i);\n\n            if (ch === '[' && ++openBrackets === 1) {\n                // Beginning of '[regex]' section\n                // Enclose regex in () brackets to enforce operator priority\n                regex += '(';\n            } else if (ch === ']' && openBrackets > 0 && --openBrackets === 0) {\n                // End of '[regex]' section\n                regex += ')';\n            } else if (openBrackets > 0) {\n                // Inside '[regex]' section\n                regex += ch;\n            } else {\n                // Outside '[regex]' section, parsing the URL part\n                const code = ch.charCodeAt(0);\n                if ((code >= 48 && code <= 57) || (code >= 65 && code <= 90) || (code >= 97 && code <= 122)) {\n                    // Alphanumeric character => copy it.\n                    regex += ch;\n                } else {\n                    // Special character => escape it\n                    const hex = code < 16 ? `0${code.toString(16)}` : code.toString(16);\n                    regex += `\\\\x${hex}`;\n                }\n            }\n        }\n        regex += '$';\n    } catch (err) {\n        throw new Error(`Cannot parse PURL '${purl}': ${err}`);\n    }\n\n    return new RegExp(regex, 'i');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,uBAAwB;AAExB,iBAAgB;AAmDT,IAAM,aAAN,MAAM,WAAU;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASnB,YAAY,MAAuB;AARnC,wBAAS;AASL,QAAI,gBAAgB,QAAQ;AACxB,WAAK,QAAQ;AAAA,IACjB,WAAW,OAAO,SAAS,UAAU;AACjC,WAAK,QAAQ,aAAa,IAAI;AAC9B,iBAAAA,QAAI,MAAM,eAAe,EAAE,MAAM,OAAO,KAAK,MAAM,CAAC;AAAA,IACxD,OAAO;AACH,YAAM,OAAO,MAAM,QAAQ,IAAI,IAAI,UAAU,OAAO;AACpD,YAAM,IAAI;AAAA,QACN,sEAAkE,0BAAQ,IAAI,CAAC,eAAe,IAAI;AAAA,MACtG;AAAA,IACJ;AAAA,EACJ;AAAA;AAAA;AAAA;AAAA,EAKA,QAAQ,KAAsB;AAC1B,WAAQ,OAAO,QAAoB,YAAY,IAAI,MAAM,KAAK,KAAK,MAAM;AAAA,EAC7E;AACJ;AA7BuB;AAAhB,IAAM,YAAN;AAkCA,SAAS,aAAa,MAAsB;AAC/C,QAAM,cAAc,KAAK,KAAK;AAC9B,MAAI,YAAY,WAAW,EAAG,OAAM,IAAI,MAAM,sBAAsB,WAAW,mCAAmC;AAElH,MAAI,QAAQ;AAEZ,MAAI;AACA,QAAI,eAAe;AACnB,aAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AACzC,YAAM,KAAK,YAAY,OAAO,CAAC;AAE/B,UAAI,OAAO,OAAO,EAAE,iBAAiB,GAAG;AAGpC,iBAAS;AAAA,MACb,WAAW,OAAO,OAAO,eAAe,KAAK,EAAE,iBAAiB,GAAG;AAE/D,iBAAS;AAAA,MACb,WAAW,eAAe,GAAG;AAEzB,iBAAS;AAAA,MACb,OAAO;AAEH,cAAM,OAAO,GAAG,WAAW,CAAC;AAC5B,YAAK,QAAQ,MAAM,QAAQ,MAAQ,QAAQ,MAAM,QAAQ,MAAQ,QAAQ,MAAM,QAAQ,KAAM;AAEzF,mBAAS;AAAA,QACb,OAAO;AAEH,gBAAM,MAAM,OAAO,KAAK,IAAI,KAAK,SAAS,EAAE,CAAC,KAAK,KAAK,SAAS,EAAE;AAClE,mBAAS,MAAM,GAAG;AAAA,QACtB;AAAA,MACJ;AAAA,IACJ;AACA,aAAS;AAAA,EACb,SAAS,KAAK;AACV,UAAM,IAAI,MAAM,sBAAsB,IAAI,MAAM,GAAG,EAAE;AAAA,EACzD;AAEA,SAAO,IAAI,OAAO,OAAO,GAAG;AAChC;AAxCgB;","names":["log"]}