/** * WordPress dependencies */ import { getPhrasingContentSchema, removeInvalidHTML } from '@wordpress/dom'; /** * Internal dependencies */ import { htmlToBlocks } from './html-to-blocks'; import { hasBlockSupport } from '../registration'; import { getBlockInnerHTML } from '../serializer'; import parse from '../parser'; import normaliseBlocks from './normalise-blocks'; import specialCommentConverter from './special-comment-converter'; import commentRemover from './comment-remover'; import isInlineContent from './is-inline-content'; import phrasingContentReducer from './phrasing-content-reducer'; import headRemover from './head-remover'; import msListConverter from './ms-list-converter'; import msListIgnore from './ms-list-ignore'; import listReducer from './list-reducer'; import imageCorrector from './image-corrector'; import blockquoteNormaliser from './blockquote-normaliser'; import divNormaliser from './div-normaliser'; import figureContentReducer from './figure-content-reducer'; import shortcodeConverter from './shortcode-converter'; import markdownConverter from './markdown-converter'; import iframeRemover from './iframe-remover'; import googleDocsUIDRemover from './google-docs-uid-remover'; import htmlFormattingRemover from './html-formatting-remover'; import brRemover from './br-remover'; import { deepFilterHTML, isPlain, getBlockContentSchema } from './utils'; import emptyParagraphRemover from './empty-paragraph-remover'; import slackParagraphCorrector from './slack-paragraph-corrector'; import isLatexMathMode from './latex-to-math'; import { createBlock } from '../factory'; import headingTransformer from './heading-transformer'; import type { Block } from '../../types'; const log = ( ...args: unknown[] ): void => window?.console?.log?.( ...args ); /** * Filters HTML to only contain phrasing content. * * @param HTML The HTML to filter. * * @return HTML only containing phrasing content. */ function filterInlineHTML( HTML: string ): string { HTML = deepFilterHTML( HTML, [ headRemover, googleDocsUIDRemover, msListIgnore, phrasingContentReducer, commentRemover, ] ); HTML = removeInvalidHTML( HTML, getPhrasingContentSchema( 'paste' ) as Parameters< typeof removeInvalidHTML >[ 1 ], true ); HTML = deepFilterHTML( HTML, [ htmlFormattingRemover, brRemover ] ); // Allows us to ask for this information when we get a report. log( 'Processed inline HTML:\n\n', HTML ); return HTML; } /** * Converts an HTML string to known blocks. Strips everything else. * * @param options * @param options.HTML The HTML to convert. * @param options.plainText Plain text version. * @param options.mode Handle content as blocks or inline content. * * 'AUTO': Decide based on the content passed. * * 'INLINE': Always handle as inline content, and return string. * * 'BLOCKS': Always handle as blocks, and return array of blocks. * @param options.tagName The tag into which content will be inserted. * * @return A list of blocks or a string, depending on `handlerMode`. */ export function pasteHandler( { HTML = '', plainText = '', mode = 'AUTO', tagName, }: { HTML?: string; plainText?: string; mode?: 'AUTO' | 'INLINE' | 'BLOCKS'; tagName?: string; } ): Block[] | string { // Allows us to ask for this information when we get a report. log( 'Received HTML (pasteHandler):\n\n', HTML ); log( 'Received plain text (pasteHandler):\n\n', plainText ); // First of all, strip any meta tags. HTML = HTML.replace( /]+>/g, '' ); // Strip Windows markers. HTML = HTML.replace( /^\s*]*>\s*]*>(?:\s*)?/i, '' ); HTML = HTML.replace( /(?:\s*)?<\/body>\s*<\/html>\s*$/i, '' ); // If we detect block delimiters in HTML, parse entirely as blocks. if ( mode !== 'INLINE' ) { // Check plain text if there is no HTML. const content = HTML ? HTML : plainText; if ( content.indexOf( '