// import from node_modules import express from 'express'; import cors from 'cors'; import puppeteer from 'puppeteer'; import type { Browser } from 'puppeteer'; import cookieParser from 'cookie-parser'; import nodeFetch from 'node-fetch'; import type { ClientRequest, IncomingMessage } from 'http'; import type { Request, Response } from 'express'; import { createLogger, format, transports } from 'winston'; import { JSDOM, VirtualConsole } from 'jsdom'; import { Gunzip, createGunzip } from 'zlib'; // import from data types import type { PageDimensions, ProxyRequestOptions, PuppeteerOptions, ServerConfigurationOptions, Viewport } from './utils/types.js'; // import from utils import { isValidURL } from './utils/isValidURL'; import { getHostPortSSL } from './utils/getHostPortSSL'; import { isURLAbsolute, getCorrectHref } from './utils/isURLAbsolute'; import { getProxyFailedPage } from './utils/proxyFailedPage'; // import raw from assets // @ts-ignore import debounceJS from './assets/debounceJS.js'; // @ts-ignore import shared from './assets/shared.js'; // @ts-ignore import sendTextData from './assets/getTextData.js'; // @ts-ignore import blockNavigation from './assets/blockNavigation.js'; // @ts-ignore import linkPreview from './assets/linkPreview.js'; // @ts-ignore import blockNavigationStyle from './assets/blockNavigation.css'; // @ts-ignore import linkPreviewStyle from './assets/linkPreview.css'; /** * This is a proxy solution to use with WebViewer-HTML that allows loading external HTML web pages so that HTML pages can be annotated. * See the npm package on {@link https://www.npmjs.com/package/@pdftron/webviewer-html-proxy-server @pdftron/webviewer-html-proxy-server} for more information. * @module @pdftron/webviewer-html-proxy-server */ /** * Initializes the proxy server to load external HTML pages. * @static * @alias module:@pdftron/webviewer-html-proxy-server.createServer * @param {object} options - The options objects containing SERVER_ROOT, PORT. * @param {string} options.SERVER_ROOT * Start the server on the specified host and port * @param {number} options.PORT * Start the server on the specified host and port * @param {cors.CorsOptions} [options.CORS_OPTIONS] * An object to configure CORS. See {@link https://expressjs.com/en/resources/middleware/cors.html} * @param {express.CookieOptions} [options.COOKIE_SETTING] * An object to configure COOKIE. See {@link https://expressjs.com/en/api.html#res.cookie} * @param {boolean} [options.ALLOW_POTENTIALLY_UNSAFE_URL] * Boolean containing value to disable URL validation. Setting this to true will override ALLOW_HTTP_PROXY. * @param {boolean} [options.ALLOW_HTTP_PROXY] * Boolean containing value to allow loading localhost files and for unsecured HTTP websites to be proxied. * @returns {void} * @example * const HTMLProxyServer = require('@pdftron/webviewer-html-proxy-server'); HTMLProxyServer.createServer({ SERVER_ROOT: `http://localhost`, PORT: 3100 }); */ const createServer = ({ SERVER_ROOT, PORT, CORS_OPTIONS = { origin: `${SERVER_ROOT}:3000`, credentials: true }, COOKIE_SETTING = {}, ALLOW_POTENTIALLY_UNSAFE_URL = false, ALLOW_HTTP_PROXY = true }: ServerConfigurationOptions): void => { const { align, colorize, combine, printf, timestamp } = format; const logger = createLogger({ format: combine( timestamp({ format: () => { return new Date().toLocaleString('en-US', { timeZone: 'America/Vancouver', month: 'short', day: '2-digit', year: 'numeric', hour: '2-digit', minute: '2-digit', second: '2-digit', }); } }), align(), printf( ({ level, message, timestamp }) => `[${timestamp}] ${level}: ${message}` ), colorize({ all: true }), ), transports: [ new transports.Console({ format: combine( colorize() ), }) ] }); if (ALLOW_POTENTIALLY_UNSAFE_URL) { logger.warn('*** URL validation is now disabled. Beware of phishing attacks.'); } else if (ALLOW_HTTP_PROXY) { logger.warn('*** Unsecured HTTP websites can now be proxied. Beware of ssrf attacks. See more here https://brightsec.com/blog/ssrf-server-side-request-forgery/'); } const app = express(); app.use(cookieParser()); app.use(cors(CORS_OPTIONS)); const PATH = `${SERVER_ROOT}:${PORT}`; const defaultViewport: Viewport = { width: 1440, height: 770 }; const puppeteerOptions: PuppeteerOptions = { product: 'chrome', defaultViewport, headless: true, ignoreHTTPSErrors: false, // whether to ignore HTTPS errors during navigation }; const defaultViewportHeightForVH = 1050; const regexForVhValue = /(\d+?)vh/g; app.get('/pdftron-proxy', async (req: Request, res: Response) => { // this is the url retrieved from the input const url = `${req.query.url}`; // ****** first check for malicious URLs if (!isValidURL(url, ALLOW_HTTP_PROXY, ALLOW_POTENTIALLY_UNSAFE_URL)) { res.status(400).send({ errorMessage: 'Please enter a valid URL and try again.' }); } else { // ****** second check for puppeteer being able to goto url let browser: Browser; try { browser = await puppeteer.launch(puppeteerOptions); const page = await browser.newPage(); // page.on('console', msg => console.log('PAGE LOG:', msg.text())); const customHeaders = req.headers.customheaders; if (customHeaders) { const customHeadersObject = JSON.parse(`${customHeaders}`); await page.setExtraHTTPHeaders(customHeadersObject); } const pageHTTPResponse = await page.goto(url, { // use 'domcontentloaded' https://github.com/puppeteer/puppeteer/issues/1666 waitUntil: 'domcontentloaded', // defaults to load }); // https://github.com/puppeteer/puppeteer/issues/2479 pageHTTPResponse could be null const validUrl: string = pageHTTPResponse?.url() || url; // check again if puppeteer's validUrl will pass the test if (validUrl !== url && !isValidURL(validUrl, ALLOW_HTTP_PROXY, ALLOW_POTENTIALLY_UNSAFE_URL)) { res.status(400).send({ errorMessage: 'Please enter a valid URL and try again.' }); } else { logger.info(`********** NEW REQUEST: ${validUrl}`); // cookie will only be set when res is sent succesfully const oneHour: number = 1000 * 60 * 60; res.cookie('pdftron_proxy_sid', validUrl, { ...COOKIE_SETTING, maxAge: oneHour }); if (customHeaders) { res.cookie('pdftron_proxy_headers', `${customHeaders}`, { ...COOKIE_SETTING, maxAge: oneHour }); } res.status(200).send({ validUrl }); } } catch (err) { logger.error(`/pdftron-proxy ${url}`, err); res.status(400).send({ errorMessage: 'Please enter a valid URL and try again.' }); } finally { try { await browser?.close(); } catch (err) { logger.error(`/pdftron-proxy browser.close ${url}`, err); } } } }); // need to be placed before app.use('/'); app.get('/pdftron-download', async (req: Request, res: Response) => { const url = `${req.query.url}`; if (!isValidURL(url, ALLOW_HTTP_PROXY, ALLOW_POTENTIALLY_UNSAFE_URL)) { res.status(400).send({ errorMessage: 'Please enter a valid URL and try again.' }); } else { logger.info(`********** DOWNLOAD: ${url}`); let browser: Browser; try { browser = await puppeteer.launch(puppeteerOptions); const page = await browser.newPage(); await page.goto(url, { waitUntil: 'domcontentloaded' }); await page.waitForTimeout(2000); // Get the "viewport" of the page, as reported by the page. const pageDimensions: PageDimensions = await page.evaluate(() => { let sum = 0; // for some web pages, and
have height: 100% // sum up the children's height for an accurate page height document.body.childNodes.forEach((el: Element) => { if (el.nodeType === Node.ELEMENT_NODE) { const style = window.getComputedStyle(el); // filter hidden/collapsible elements if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0' || style.position === 'fixed' || style.position === 'absolute') { return; } // some elements have undefined clientHeight // favor scrollHeight since clientHeight does not include padding if (!isNaN(el.scrollHeight) && !isNaN(el.clientHeight)) { sum += (el.clientHeight > 0 ? (el.scrollHeight || el.clientHeight) : el.clientHeight); } } }); const bodyHeight = document.body.scrollHeight || document.body.clientHeight || 0; const pageHeight = bodyHeight > 0 && bodyHeight > sum ? bodyHeight : sum; return { width: document.body.scrollWidth || document.body.clientWidth || 1440, // sum can be less than defaultViewport height: pageHeight > 770 ? pageHeight : 770, }; }); const buffer = await page.screenshot({ type: 'png', fullPage: true }); res.setHeader('Cache-Control', ['no-cache', 'no-store', 'must-revalidate']); res.status(200).send({ buffer, pageDimensions }); } catch (err) { logger.error(`/pdftron-download ${url}`, err); res.status(400).send({ errorMessage: 'Error taking screenshot from puppeteer' }); } finally { try { await browser?.close(); } catch (err) { logger.error(`/pdftron-download browser.close ${url}`, err); } } } }); app.get('/pdftron-link-preview', async (req: Request, res: Response) => { const linkToPreview = `${req.query.url}`; try { const page = await nodeFetch(linkToPreview); const virtualConsole = new VirtualConsole(); virtualConsole.on('error', () => { // No-op to skip console errors. https://github.com/jsdom/jsdom/issues/2230 }); const virtualDOM = new JSDOM(await page.text(), { virtualConsole }); const { window } = virtualDOM; const { document } = window; const pageTitle: string = document.title; const faviconValidURLs: string[] = []; const faviconDataURLs: string[] = []; const getAllFaviconURLs = (selectors: string) => { document.querySelectorAll(selectors).forEach((el) => { if (el.getAttribute('href')) { // if favicon is a data URL, new URL() will return the same value const { href: absoluteFaviconURL } = new URL(el.getAttribute('href'), linkToPreview); // separate valid faviconURL and data faviconURL if (isURLAbsolute(absoluteFaviconURL)) { faviconValidURLs.push(absoluteFaviconURL); } else { faviconDataURLs.push(absoluteFaviconURL); } } }); }; // prioritize [rel="icon"] over [rel="shortcut icon"]; getAllFaviconURLs('link[rel="icon"]'); getAllFaviconURLs('link[rel="shortcut icon"]'); const faviconUrl = faviconValidURLs[0] || faviconDataURLs[0] || ''; const metaSelectors: NodeListOf