import 'form-request-submit-polyfill' import { camelCase, lowerCase, snakeCase } from 'es-toolkit' import { domReady } from '../lib/dom-ready' import { PageDefault, pageDefaults } from './page/page-info' let unregisterHandlers: () => void | undefined const sensitiveNames = [ 'captcha', 'card', 'cc', 'ccn', 'credit', 'cvv', 'password', 'payable', 'pin', 'pw', 'receivable', 'routing', 'secret', 'security', 'ssn', 'token', 'account' ].flatMap((name) => [name.toLowerCase(), snakeCase(name), lowerCase(name)]) const traitNames = [ 'email', 'name', 'phone', 'phoneNumber', 'firstName', 'lastName', 'fullName', 'title', 'username', 'website', 'domain', 'company' ] export type InputElement = HTMLInputElement | HTMLSelectElement | HTMLTextAreaElement /** * Find the nearest ancestor that matches the predicate * Stops at the parent form tag */ function findAncestor(element: HTMLElement | null, callback: (e: HTMLElement) => boolean) { let current: HTMLElement | null = element while (current) { if (callback(current)) { break } current = current.parentElement if (!current || current.tagName === 'FORM') { break } } return current } function extractText(element?: HTMLElement | null) { return element?.innerText?.trim() || element?.textContent?.trim() } /** * Find the nearest label to a given form element within the form * 1. searches for associated `labels` * 2. searches sibling element labels * 3. searches nearest parent that has a label and no other input children */ export function getNearestLabel(element: InputElement, others: InputElement[]) { let label: string | undefined if (element.labels && element.labels.length) { const [el, ..._others] = Array.from(element.labels) label = extractText(el) } if (!label) { const siblingLabels = [element.previousElementSibling, element.nextElementSibling].filter( (e) => e?.tagName === 'LABEL' && extractText(e as HTMLLabelElement) ) as HTMLLabelElement[] if (siblingLabels.length) { label = extractText(siblingLabels[0]) } } if (!label) { const ancestor = findAncestor(element, (e) => { const hasLabel = e.querySelectorAll('label').length > 0 const noOtherInputs = others.every((other) => other === element || !e.contains(other)) return hasLabel && noOtherInputs }) if (ancestor && ancestor.tagName !== 'FORM') { label = extractText(ancestor.querySelector('label')) } } // normalize label, remove trailing `*` and other special chars and downcase return camelCase(label || '') } function generateId(element: InputElement, index: number) { const tag = element.tagName.toLowerCase() const type = element.type const id = element.id ? `#${element.id}` : '' const className = element.className ? `.${element.className}` : '' return `${tag}_${index}_${[type, id, className].filter(Boolean).join('_')}` } interface FormElementData { id: string label: string name: string | undefined type: string tagName: string value: string } function getFormElementData(form: HTMLFormElement) { const data: Record = {} const elements = Array.from(form.elements) as InputElement[] for (const element of elements) { if (!element.value) { continue } // Ignore buttons and other elements we dont care about if (!['INPUT', 'SELECT', 'TEXTAREA'].includes(element.tagName)) { continue } // Ignore unchecked checkboxes if (['checkbox', 'radio'].includes(element.type) && !(element as HTMLInputElement).checked) { continue } // Ignore sensitive inputs if (element.type === 'password') { continue } // Ignore hidden inputs if (element.type === 'hidden') { continue } // Ignore submit buttons if (element.type === 'submit') { continue } if (element.name && sensitiveNames.some((s) => element.name.toLowerCase().includes(s))) { continue } if (element.id && sensitiveNames.some((s) => element.id.toLowerCase().includes(s))) { continue } const label = getNearestLabel(element, elements) if (label && sensitiveNames.some((s) => label.toLowerCase().includes(s))) { continue } const id = element.id || generateId(element, elements.indexOf(element)) const keys = [element.name, label, id].filter(Boolean) // find the first available key // does not support multi-inputs with the same name for (const key of keys) { if (!data[key]) { data[key] = { id: id, label: label, name: element.name, type: element.type, tagName: element.tagName, value: element.value } break } } } return data } function extractFormData(fields: Record) { const formData: { [key: string]: unknown } = {} for (const [key, data] of Object.entries(fields)) { formData[key] = data.value } return formData } /** * Validates an email using native browser validity checks */ export function validEmail(value: unknown): boolean { const input = document.createElement('input') input.type = 'email' input.required = true input.value = String(value) return input.checkValidity() } function emailKeys(fields: Record): string[] { return Object.entries(fields) .filter(([_key, data]) => { if ( data.type === 'email' || data.name?.toLowerCase()?.includes('email') || data.label?.toLowerCase()?.includes('email') || data.id?.toLowerCase()?.includes('email') ) { return validEmail(data.value) } return false }) .map(([key]) => key) } /** * Extracts traits from form data */ function extractTraits(fields: Record) { const traits: Record = {} const emails = emailKeys(fields) const hasOneEmail = emails.length === 1 for (const [key, data] of Object.entries(fields)) { // Preserve this order so `email` is always downcased if (hasOneEmail && emails[0] === key) { traits.email = data.value continue } // normalize potential trait names into camelCase // weed out labels like `Your *` or `Work *` or `Business *` or `Job *` const names = [data.name, data.label, data.id].filter(Boolean).map((n) => { return camelCase(lowerCase(n || '').replace(/^(your|work|business|job)(\s+)/i, '')) }) // find the most semantically relevant name for this field that hasn't been used yet const name = names.find((n) => traitNames.includes(n) && !traits[n]) // ignore email (we already captured it above, any other email values may be invalid) if (name && name !== 'email' && data.value) { traits[name] = data.value } } // only return traits if there is an email identified, // otherwise generic field names are hard to interpret as traits (e.g. `name`, `title`) if (traits.email || (traits.firstName && traits.lastName)) { return traits } else { return {} } } interface FormDetails { action?: string name?: string | null method?: string formData: { [key: string]: unknown } traits: { [key: string]: unknown } context: { page: PageDefault selector?: string | null } } type FormDataCallback = (details: FormDetails) => Promise | void export interface IgnoredForm { identifier_type: 'path' identifier: string } export interface FormCollectionOptions { ignoredForms?: IgnoredForm[] } export function collectFormSubmissions(callback: FormDataCallback, options: FormCollectionOptions = {}) { // Unregister any previous event handlers stopCollectingForms() const handleSubmit = async (form: HTMLFormElement) => { const isFormElement = form instanceof HTMLFormElement const tagName = form.tagName const validForm = isFormElement || tagName === 'FORM' const isOff = form.getAttribute('data-koala-collect') === 'off' if (isOff) { return } if (!validForm) { return } const page = pageDefaults() const ignoredForms = options.ignoredForms || [] if (ignoredForms.some((f) => f.identifier_type === 'path' && f.identifier === page.path)) { return } const fields = getFormElementData(form) const formData = extractFormData(fields) const traits = extractTraits(fields) const selector = form.getAttribute('data-koala-selector') || form.getAttribute('id') || form.className try { await callback({ context: { page, selector }, // If a
element contains an element named `name` then that element overrides the form.name property, so that you can't access it. // so use `getAttribute` instead. :| name: form.getAttribute('name') || form.id, method: form.method, action: form.action, formData, traits }) } catch (e) { // do nothing } } // Collects all form data, except password fields, // without modifying the submission event. const onSubmit = async (event: SubmitEvent) => { try { const form = event.target as HTMLFormElement if (typeof form.requestSubmit === 'function') { event.preventDefault() event.stopPropagation() await handleSubmit(form) const submitter = form.querySelector('button[type=submit], input[type=submit]') const doc = event.currentTarget as Document setTimeout(() => { if (submitter) { form.requestSubmit(submitter as HTMLElement) } else { form.requestSubmit() } // Add again after dispatching the bubbling submit event, just in case doc?.addEventListener('submit', onSubmit, { capture: true, once: true }) }, 0) } else { await handleSubmit(form) // Add again after dispatching the bubbling submit event, just in case const doc = event.currentTarget as Document doc?.addEventListener('submit', onSubmit, { capture: true, once: true }) } } catch (e) { // do nothing } } document.addEventListener('submit', onSubmit, { capture: true, once: true }) // Collects form data from iframes on the page const iframes = document.querySelectorAll('iframe') iframes.forEach((iframe) => { const doc = iframe.contentDocument if (!doc) { return } domReady(() => { doc.addEventListener('submit', onSubmit, { capture: true, once: true }) }, doc) }) // Per specification, SubmitEvent is not triggered when calling form.submit(). // Hook the method to call the handler in that case before calling the original. const ogSubmit = HTMLFormElement.prototype.submit HTMLFormElement.prototype.submit = async function () { try { await handleSubmit(this) } catch (e) { // do nothing } ogSubmit.call(this) } // Fn to remove previous event handlers if this gets called multiple times unregisterHandlers = () => { document.removeEventListener('submit', onSubmit, { capture: true }) iframes.forEach((iframe) => { const doc = iframe.contentDocument if (!doc) { return } domReady(() => { doc.removeEventListener('submit', onSubmit, { capture: true }) }, doc) }) HTMLFormElement.prototype.submit = ogSubmit } return unregisterHandlers } export function stopCollectingForms() { if (unregisterHandlers) { unregisterHandlers() } }