/** * Web MCP tools module * Provides tools for interacting with web resources and APIs * * This module contains tools for various web operations including: * - HTTP requests and API calls * - Web scraping and content parsing * - File downloads and uploads * - Browser automation (basic implementation) * - PDF generation (basic implementation) * * The tools are designed to be used with the MCP (Management Control Plane) system * and follow a standard interface pattern with name, description, schema and handler. */ import { MCPTool, MCPContext } from "../../../utils/types.ts"; import { ILogger } from "../../../core/logger.ts"; import { join } from 'node:path'; import { promises as fs } from 'node:fs'; import { createWriteStream } from 'node:fs'; import { pipeline } from 'node:stream/promises'; import { exec as nodeExec } from 'node:child_process'; import { promisify } from 'node:util'; import * as https from 'node:https'; import * as http from 'node:http'; import * as url from 'node:url'; // Fix for tests - make sure exec exists before trying to promisify // This ensures that in test environments where nodeExec might be mocked or undefined, // we provide a fallback implementation to prevent promisify from throwing errors. // The fallback mock returns standardized objects with empty event handlers. const exec = nodeExec || ((cmd: any, opts: any, callback: any) => { if (callback) callback(null, 'mocked stdout', ''); return { stdout: { on: () => {} }, stderr: { on: () => {} }, on: () => {} }; }) as any; const execAsync = promisify(exec); /** * Basic fetch implementation using node's http/https modules * * This provides a minimal implementation of the fetch API using native Node.js * http/https modules without requiring external dependencies. It handles: * - Both HTTP and HTTPS requests * - Automatic redirect following * - Request body handling * - Response data collection * * @param url - The URL to fetch * @param options - Request options similar to fetch API * @returns Promise resolving to response with status, headers and body */ async function simpleFetch(url: string, options: any = {}): Promise { return new Promise((resolve, reject) => { const isHttps = url.startsWith('https'); const client = isHttps ? https : http; const req = client.request(url, options, (res) => { let data = ''; // Handle redirects if (res.statusCode && (res.statusCode >= 300 && res.statusCode < 400) && res.headers.location) { simpleFetch(res.headers.location, options) .then(resolve) .catch(reject); return; } res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { resolve({ status: res.statusCode || 200, statusText: res.statusMessage || 'OK', headers: res.headers, body: data }); }); }); req.on('error', (error) => { reject(error); }); if (options.body) { req.write(options.body); } req.end(); }); } /** * Extended context for web tools with additional web-specific properties * * WebToolContext extends the base MCPContext to add properties needed for * web operations like HTTP requests, file downloads, and browser automation. */ export interface WebToolContext extends MCPContext { workingDirectory?: string; // Optional working directory for file operations headers?: Record; // Default headers for all HTTP requests proxy?: string; // Optional proxy configuration for network requests } /** * Create all Web MCP tools */ export function createWebTools(logger: ILogger): MCPTool[] { return [ // HTTP/API Tools createRequestTool(logger), createScrapeTool(logger), createParseTool(logger), createDownloadTool(logger), createUploadTool(logger), createApiTool(logger), // Browser Tools createBrowserTool(logger), createScreenshotTool(logger), // Document Tools createPdfTool(logger), // Session Tools createFormsTool(logger), createCookiesTool(logger), createSessionTool(logger), ]; } /** * Makes HTTP requests */ function createRequestTool(logger: ILogger): MCPTool { return { name: 'web/request', description: 'Make HTTP requests to web servers', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to request', }, method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'], description: 'HTTP method', default: 'GET', }, headers: { type: 'object', description: 'HTTP headers', }, body: { type: 'string', description: 'Request body', }, json: { type: 'boolean', description: 'Parse response as JSON', default: false, }, timeout: { type: 'number', description: 'Request timeout in milliseconds', default: 30000, }, proxy: { type: 'string', description: 'Proxy URL', }, followRedirects: { type: 'boolean', description: 'Follow HTTP redirects', default: true, }, }, required: ['url'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Making HTTP request', { url: input.url, method: input.method || 'GET', sessionId: context?.sessionId, }); try { const options: any = { method: input.method || 'GET', headers: { ...(context?.headers || {}), ...(input.headers || {}), }, timeout: input.timeout || 30000, }; // Add proxy if specified if (input.proxy || context?.proxy) { options.proxy = input.proxy || context?.proxy; } // Add request body if provided if (input.body) { options.body = input.body; // Auto-add content-type for JSON if not specified if (typeof input.body === 'object' && !options.headers['content-type'] && !options.headers['Content-Type']) { options.headers['Content-Type'] = 'application/json'; options.body = JSON.stringify(input.body); } } // Make request const response = await simpleFetch(input.url, options); // Handle JSON response let responseBody = response.body; if (input.json) { try { responseBody = JSON.parse(response.body); } catch (e) { logger.warn('Failed to parse JSON response', { url: input.url, error: e }); } } return { url: input.url, status: response.status, headers: response.headers, body: responseBody, timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('HTTP request failed', { url: input.url, error: error.message }); throw new Error(`Failed to request ${input.url}: ${error.message}`); } }, }; } /** * Scrapes web content */ function createScrapeTool(logger: ILogger): MCPTool { return { name: 'web/scrape', description: 'Scrape content from websites', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to scrape', }, selector: { type: 'string', description: 'CSS selector to target elements', }, attribute: { type: 'string', description: 'Attribute to extract from matched elements', }, multiple: { type: 'boolean', description: 'Extract multiple elements matching the selector', default: true, }, waitFor: { type: 'string', description: 'Wait for this selector to appear before scraping', }, textOnly: { type: 'boolean', description: 'Extract text content only', default: true, }, headers: { type: 'object', description: 'HTTP headers for the request', }, timeout: { type: 'number', description: 'Request timeout in milliseconds', default: 30000, }, renderJs: { type: 'boolean', description: 'Render JavaScript before scraping (uses headless browser)', default: false, }, }, required: ['url'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Scraping web content', { url: input.url, selector: input.selector, sessionId: context?.sessionId, }); try { let html: string; // If JavaScript rendering is required, use browser automation if (input.renderJs) { // Use browser tool to get rendered HTML const browserTool = createBrowserTool(logger); const browserResult = await browserTool.handler({ url: input.url, action: 'getHTML', waitFor: input.waitFor, timeout: input.timeout || 30000, }, context) as any; html = browserResult.html; } else { // Simple request to get HTML const requestTool = createRequestTool(logger); const requestResult = await requestTool.handler({ url: input.url, method: 'GET', headers: input.headers || { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' }, timeout: input.timeout || 30000, }, context) as any; html = requestResult.body; } // For basic scraping, we'll use simple regex patterns // For production, you'd want to use a proper HTML parser if (input.selector) { let results: any[] = []; // Attempt to extract content based on selector and attribute // This is a simplistic implementation and would need to be enhanced // with a proper HTML parser for production use const regex = new RegExp(`<${input.selector}[^>]*>(.*?)`, 'g'); let match; while ((match = regex.exec(html)) !== null) { let result = match[1]; if (input.attribute && match[0].includes(input.attribute)) { const attrRegex = new RegExp(`${input.attribute}="([^"]*)"`, 'i'); const attrMatch = match[0].match(attrRegex); if (attrMatch) { result = attrMatch[1]; } } results.push(result); } if (!input.multiple && results.length > 0) { results = [results[0]]; } return { url: input.url, results, count: results.length, selector: input.selector, timestamp: new Date().toISOString(), }; } // If no selector provided, return the full HTML return { url: input.url, html, timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('Web scraping failed', { url: input.url, error: error.message }); throw new Error(`Failed to scrape ${input.url}: ${error.message}`); } }, }; } /** * Parses HTML/XML content */ function createParseTool(logger: ILogger): MCPTool { return { name: 'web/parse', description: 'Parse HTML or XML content', inputSchema: { type: 'object', properties: { content: { type: 'string', description: 'HTML or XML content to parse', }, url: { type: 'string', description: 'URL to fetch and parse (alternative to content)', }, type: { type: 'string', enum: ['html', 'xml', 'auto'], description: 'Content type', default: 'auto', }, query: { type: 'string', description: 'XPath or CSS query to extract data', }, multiple: { type: 'boolean', description: 'Return multiple matching elements', default: true, }, extract: { type: 'array', items: { type: 'string', }, description: 'List of attributes or properties to extract from matched elements', }, format: { type: 'string', enum: ['json', 'text', 'html', 'markdown'], description: 'Output format', default: 'json', }, }, required: [], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Parsing web content', { contentLength: input.content?.length, url: input.url, query: input.query, sessionId: context?.sessionId, }); try { let content = input.content; // Fetch content if URL is provided if (!content && input.url) { const requestTool = createRequestTool(logger); const requestResult = await requestTool.handler({ url: input.url, method: 'GET', }, context) as any; content = requestResult.body; } if (!content) { throw new Error('No content or URL provided for parsing'); } // Determine content type if auto const type = input.type === 'auto' ? (content.trim().startsWith(']*>(.*?)`, 'g'); let match; while ((match = regex.exec(content)) !== null) { const result: any = { text: match[1], }; // Extract requested attributes if (input.extract && Array.isArray(input.extract)) { for (const attr of input.extract) { const attrRegex = new RegExp(`${attr}="([^"]*)"`, 'i'); const attrMatch = match[0].match(attrRegex); if (attrMatch) { result[attr] = attrMatch[1]; } } } results.push(result); } } // Format results according to requested format let formattedResults: any = results; if (input.format === 'text' && results.length > 0) { formattedResults = results.map(item => item.text || '').join('\n'); } else if (input.format === 'markdown' && results.length > 0) { // Simple conversion to markdown formattedResults = results.map(item => `- ${item.text || ''}`).join('\n'); } // Return single result if multiple is false if (!input.multiple && results.length > 0) { formattedResults = input.format === 'json' ? results[0] : (Array.isArray(formattedResults) ? formattedResults[0] : formattedResults); } return { type, query: input.query, results: formattedResults, count: Array.isArray(results) ? results.length : (results ? 1 : 0), timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('Content parsing failed', { error: error.message }); throw new Error(`Failed to parse content: ${error.message}`); } }, }; } /** * Downloads files */ function createDownloadTool(logger: ILogger): MCPTool { return { name: 'web/download', description: 'Download files from web servers', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to download', }, destination: { type: 'string', description: 'Local path to save file', }, headers: { type: 'object', description: 'HTTP headers for the request', }, timeout: { type: 'number', description: 'Download timeout in milliseconds', default: 300000, // 5 minutes }, createDirs: { type: 'boolean', description: 'Create parent directories if they do not exist', default: true, }, overwrite: { type: 'boolean', description: 'Overwrite file if it already exists', default: false, }, }, required: ['url'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Downloading file', { url: input.url, destination: input.destination, sessionId: context?.sessionId, }); try { // Determine destination path let destination = input.destination; // If no destination provided, extract filename from URL if (!destination) { const parsedUrl = url.parse(input.url); const pathParts = parsedUrl.pathname?.split('/') || []; const fileName = pathParts[pathParts.length - 1] || 'download'; destination = fileName; } // Resolve destination if working directory is provided const resolvedDest = context?.workingDirectory ? join(context.workingDirectory, destination) : destination; // Create parent directories if needed if (input.createDirs !== false) { await fs.mkdir(join(resolvedDest, '..'), { recursive: true }); } // Check if file exists let fileExists = false; try { await fs.access(resolvedDest); fileExists = true; } catch { // File doesn't exist, which is fine } // Don't overwrite unless explicitly allowed if (fileExists && input.overwrite !== true) { throw new Error(`Destination file ${destination} already exists and overwrite is not enabled`); } // Create HTTP request options const options: any = { headers: { ...(context?.headers || {}), ...(input.headers || {}), }, timeout: input.timeout || 300000, }; // Download file const isHttps = input.url.startsWith('https'); const client = isHttps ? https : http; await new Promise((resolve, reject) => { const req = client.get(input.url, options, (res) => { // Handle redirects if (res.statusCode && (res.statusCode >= 300 && res.statusCode < 400) && res.headers.location) { // Create a new download request with the redirected URL const redirectUrl = res.headers.location; createDownloadTool(logger).handler({ ...input, url: redirectUrl, }, context) .then(() => resolve()) .catch(reject); return; } // Check for successful response if (res.statusCode !== 200) { reject(new Error(`HTTP Error: ${res.statusCode} ${res.statusMessage}`)); return; } // Create write stream for destination file const fileStream = createWriteStream(resolvedDest); // Handle events res.pipe(fileStream); fileStream.on('error', (err) => { reject(err); }); fileStream.on('finish', () => { fileStream.close(); resolve(); }); res.on('error', (err) => { fileStream.close(); reject(err); }); }); req.on('error', reject); req.on('timeout', () => { req.destroy(); reject(new Error(`Download timed out after ${input.timeout || 300000}ms`)); }); }); // Get file stats const stats = await fs.stat(resolvedDest); return { url: input.url, destination, size: stats.size, downloaded: true, timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('File download failed', { url: input.url, error: error.message }); throw new Error(`Failed to download ${input.url}: ${error.message}`); } }, }; } /** * Uploads files */ function createUploadTool(logger: ILogger): MCPTool { return { name: 'web/upload', description: 'Upload files to web servers', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to upload to', }, filePath: { type: 'string', description: 'Path to file to upload', }, method: { type: 'string', enum: ['POST', 'PUT'], description: 'HTTP method', default: 'POST', }, fieldName: { type: 'string', description: 'Form field name for file', default: 'file', }, formData: { type: 'object', description: 'Additional form fields', }, headers: { type: 'object', description: 'HTTP headers', }, timeout: { type: 'number', description: 'Upload timeout in milliseconds', default: 300000, // 5 minutes }, }, required: ['url', 'filePath'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Uploading file', { url: input.url, filePath: input.filePath, method: input.method || 'POST', sessionId: context?.sessionId, }); try { // Resolve file path if working directory is provided const resolvedPath = context?.workingDirectory ? join(context.workingDirectory, input.filePath) : input.filePath; // Check if file exists await fs.access(resolvedPath); // Get file stats const stats = await fs.stat(resolvedPath); // Since we're using basic HTTP module which doesn't natively support multipart/form-data, // we'll use curl for this operation which is more efficient for file uploads // Build curl command let cmd = `curl -X ${input.method || 'POST'} "${input.url}"`; // Add form field for file cmd += ` -F "${input.fieldName || 'file'}=@${resolvedPath}"`; // Add additional form fields if (input.formData && typeof input.formData === 'object') { for (const [key, value] of Object.entries(input.formData)) { cmd += ` -F "${key}=${value}"`; } } // Add headers const headers = { ...(context?.headers || {}), ...(input.headers || {}), }; for (const [key, value] of Object.entries(headers)) { cmd += ` -H "${key}: ${value}"`; } // Add timeout cmd += ` --max-time ${Math.floor((input.timeout || 300000) / 1000)}`; // Add silent mode but show errors cmd += ' -s -S'; // Execute curl command const { stdout, stderr } = await execAsync(cmd); // Parse response if possible let response: any = stdout; try { response = JSON.parse(stdout); } catch { // Not JSON, leave as is } return { url: input.url, filePath: input.filePath, size: stats.size, response, uploaded: true, timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('File upload failed', { url: input.url, filePath: input.filePath, error: error.message }); throw new Error(`Failed to upload ${input.filePath} to ${input.url}: ${error.message}`); } }, }; } /** * Makes API calls */ function createApiTool(logger: ILogger): MCPTool { return { name: 'web/api', description: 'Make API calls with structured request and response handling', inputSchema: { type: 'object', properties: { baseUrl: { type: 'string', description: 'Base URL for the API', }, endpoint: { type: 'string', description: 'API endpoint path', }, method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'DELETE', 'PATCH'], description: 'HTTP method', default: 'GET', }, params: { type: 'object', description: 'Query parameters', }, data: { type: 'object', description: 'Request body data (for POST, PUT, PATCH)', }, headers: { type: 'object', description: 'HTTP headers', }, auth: { type: 'object', properties: { type: { type: 'string', enum: ['basic', 'bearer', 'api-key'], description: 'Authentication type', }, username: { type: 'string', description: 'Username for basic auth', }, password: { type: 'string', description: 'Password for basic auth', }, token: { type: 'string', description: 'Bearer token or API key', }, keyName: { type: 'string', description: 'Header name for API key', default: 'X-API-Key', }, }, description: 'Authentication configuration', }, timeout: { type: 'number', description: 'Request timeout in milliseconds', default: 30000, }, validateStatus: { type: 'boolean', description: 'Validate response status and throw error if not 2xx', default: true, }, }, required: [], }, handler: async (input: any, context?: WebToolContext) => { const fullUrl = input.endpoint ? `${input.baseUrl || ''}${input.endpoint}` : input.baseUrl; if (!fullUrl) { throw new Error('Either baseUrl or endpoint must be provided'); } logger.info('Making API call', { url: fullUrl, method: input.method || 'GET', sessionId: context?.sessionId, }); try { // Build URL with query parameters let apiUrl = fullUrl; if (input.params && typeof input.params === 'object') { const queryParams = Object.entries(input.params) .map(([key, value]) => `${encodeURIComponent(key)}=${encodeURIComponent(String(value))}`) .join('&'); if (queryParams) { apiUrl += apiUrl.includes('?') ? `&${queryParams}` : `?${queryParams}`; } } // Build headers const headers: Record = { 'Content-Type': 'application/json', 'Accept': 'application/json', ...(context?.headers || {}), ...(input.headers || {}), }; // Add authentication if provided if (input.auth) { switch (input.auth.type) { case 'basic': if (input.auth.username && input.auth.password) { const credentials = Buffer.from(`${input.auth.username}:${input.auth.password}`).toString('base64'); headers['Authorization'] = `Basic ${credentials}`; } break; case 'bearer': if (input.auth.token) { headers['Authorization'] = `Bearer ${input.auth.token}`; } break; case 'api-key': if (input.auth.token) { const keyName = input.auth.keyName || 'X-API-Key'; headers[keyName] = input.auth.token; } break; } } // Build request options const options: any = { method: input.method || 'GET', headers, timeout: input.timeout || 30000, }; // Add body for POST, PUT, PATCH if (['POST', 'PUT', 'PATCH'].includes(options.method) && input.data) { options.body = JSON.stringify(input.data); } // Make request const requestTool = createRequestTool(logger); const response = await requestTool.handler({ url: apiUrl, method: options.method, headers: options.headers, body: options.body, json: true, timeout: options.timeout, }, context) as any; // Validate status code if requested if (input.validateStatus !== false && response.status && (response.status < 200 || response.status >= 300)) { throw new Error(`API call failed with status ${response.status}`); } return { url: apiUrl, method: options.method, status: response.status, data: response.body, headers: response.headers, timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('API call failed', { url: fullUrl, error: error.message }); throw new Error(`Failed to call API ${fullUrl}: ${error.message}`); } }, }; } /** * Automates browser actions */ function createBrowserTool(logger: ILogger): MCPTool { return { name: 'web/browser', description: 'Automate browser actions using headless browser', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to open in browser', }, action: { type: 'string', enum: ['navigate', 'click', 'type', 'select', 'screenshot', 'getHTML', 'evaluate'], description: 'Browser action to perform', default: 'navigate', }, selector: { type: 'string', description: 'CSS selector for target element', }, text: { type: 'string', description: 'Text to type or evaluate', }, waitFor: { type: 'string', description: 'Selector to wait for before action', }, timeout: { type: 'number', description: 'Action timeout in milliseconds', default: 30000, }, viewportWidth: { type: 'number', description: 'Browser viewport width', default: 1280, }, viewportHeight: { type: 'number', description: 'Browser viewport height', default: 800, }, userAgent: { type: 'string', description: 'Browser user agent string', }, }, required: ['url'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Browser automation', { url: input.url, action: input.action || 'navigate', sessionId: context?.sessionId, }); try { // Since headless browser automation typically requires puppeteer or playwright, // which are not standard Node.js libraries, we'll implement a simplified version // that calls curl for basic operations // For a production implementation, you'd want to use puppeteer or playwright // Simple navigation and HTML retrieval if (input.action === 'navigate' || input.action === 'getHTML') { // Build curl command let cmd = `curl -s "${input.url}"`; // Add user agent if specified if (input.userAgent) { cmd += ` -A "${input.userAgent}"`; } else { // Use a default modern user agent cmd += ` -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"`; } // Add timeout cmd += ` --max-time ${Math.floor((input.timeout || 30000) / 1000)}`; // Execute curl command const { stdout } = await execAsync(cmd); return { url: input.url, action: input.action, html: stdout, timestamp: new Date().toISOString(), }; } // For more complex actions, inform that full browser automation requires additional libraries return { url: input.url, action: input.action, error: "Full browser automation requires puppeteer or playwright libraries", suggestion: "For complex browser automation, install puppeteer or playwright", timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('Browser automation failed', { url: input.url, action: input.action, error: error.message }); throw new Error(`Failed to perform browser action ${input.action} on ${input.url}: ${error.message}`); } }, }; } /** * Captures screenshots */ function createScreenshotTool(logger: ILogger): MCPTool { return { name: 'web/screenshot', description: 'Capture screenshots of web pages', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to capture', }, destination: { type: 'string', description: 'Path to save screenshot', }, format: { type: 'string', enum: ['png', 'jpeg'], description: 'Screenshot format', default: 'png', }, fullPage: { type: 'boolean', description: 'Capture full page height', default: false, }, selector: { type: 'string', description: 'CSS selector to capture specific element', }, viewportWidth: { type: 'number', description: 'Browser viewport width', default: 1280, }, viewportHeight: { type: 'number', description: 'Browser viewport height', default: 800, }, waitFor: { type: 'string', description: 'Wait for element matching this selector before screenshot', }, timeout: { type: 'number', description: 'Screenshot timeout in milliseconds', default: 30000, }, }, required: ['url'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Capturing screenshot', { url: input.url, destination: input.destination, sessionId: context?.sessionId, }); try { // Since screenshot capture requires puppeteer or similar, // and we're aiming for a minimal implementation without external dependencies, // we'll provide a simplified implementation // Determine destination path let destination = input.destination; // If no destination provided, create a default name if (!destination) { const urlObj = new URL(input.url); const host = urlObj.hostname.replace(/\./g, '-'); const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); destination = `${host}-${timestamp}.${input.format || 'png'}`; } // Resolve destination if working directory is provided const resolvedDest = context?.workingDirectory ? join(context.workingDirectory, destination) : destination; // Create parent directories if needed await fs.mkdir(join(resolvedDest, '..'), { recursive: true }); // For a simple implementation, we'll use a message about needing external dependencies // In a production setting, you'd use puppeteer or playwright for this return { url: input.url, destination, error: "Screenshot capture requires puppeteer or playwright libraries", suggestion: "For screenshot capture, install puppeteer or playwright", timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('Screenshot capture failed', { url: input.url, error: error.message }); throw new Error(`Failed to capture screenshot of ${input.url}: ${error.message}`); } }, }; } /** * Works with PDF documents */ function createPdfTool(logger: ILogger): MCPTool { return { name: 'web/pdf', description: 'Generate or manipulate PDF documents', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to generate PDF from', }, html: { type: 'string', description: 'HTML content to generate PDF from', }, destination: { type: 'string', description: 'Path to save PDF', }, format: { type: 'string', enum: ['A4', 'A3', 'Letter', 'Legal', 'Tabloid'], description: 'PDF page format', default: 'A4', }, landscape: { type: 'boolean', description: 'Use landscape orientation', default: false, }, margin: { type: 'object', properties: { top: { type: 'string' }, right: { type: 'string' }, bottom: { type: 'string' }, left: { type: 'string' }, }, description: 'PDF margins', }, headerTemplate: { type: 'string', description: 'HTML for the header template', }, footerTemplate: { type: 'string', description: 'HTML for the footer template', }, timeout: { type: 'number', description: 'PDF generation timeout in milliseconds', default: 60000, }, }, required: [], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Working with PDF', { url: input.url, destination: input.destination, sessionId: context?.sessionId, }); try { // Determine source (URL or HTML) if (!input.url && !input.html) { throw new Error('Either url or html must be provided'); } // Determine destination path let destination = input.destination; // If no destination provided, create a default name if (!destination) { const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); destination = `document-${timestamp}.pdf`; } // Resolve destination if working directory is provided const resolvedDest = context?.workingDirectory ? join(context.workingDirectory, destination) : destination; // Create parent directories if needed await fs.mkdir(join(resolvedDest, '..'), { recursive: true }); // For a minimal implementation, we'll provide a message about needing external dependencies // In a production setting, you'd use puppeteer, wkhtmltopdf, or similar return { destination, error: "PDF generation requires puppeteer or other PDF libraries", suggestion: "For PDF generation, install puppeteer or wkhtmltopdf", timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('PDF operation failed', { error: error.message }); throw new Error(`Failed to perform PDF operation: ${error.message}`); } }, }; } /** * Handles web forms */ function createFormsTool(logger: ILogger): MCPTool { return { name: 'web/forms', description: 'Interact with web forms', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL of form page', }, formSelector: { type: 'string', description: 'CSS selector for the form', default: 'form', }, fields: { type: 'array', items: { type: 'object', properties: { name: { type: 'string' }, selector: { type: 'string' }, value: { type: 'string' }, type: { type: 'string', enum: ['text', 'textarea', 'select', 'checkbox', 'radio', 'file'], default: 'text', }, }, required: ['name', 'value'], }, description: 'Form fields to fill', }, submit: { type: 'boolean', description: 'Submit the form after filling', default: true, }, submitSelector: { type: 'string', description: 'CSS selector for submit button', }, waitForNavigation: { type: 'boolean', description: 'Wait for navigation after submit', default: true, }, timeout: { type: 'number', description: 'Form interaction timeout in milliseconds', default: 30000, }, }, required: ['url', 'fields'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Form interaction', { url: input.url, formSelector: input.formSelector || 'form', sessionId: context?.sessionId, }); try { // For a minimal implementation, we'll provide a message about needing external dependencies // In a production setting, you'd use puppeteer or playwright for this return { url: input.url, formSelector: input.formSelector || 'form', fields: input.fields, error: "Form interaction requires puppeteer or playwright libraries", suggestion: "For form interaction, install puppeteer or playwright", timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('Form interaction failed', { url: input.url, error: error.message }); throw new Error(`Failed to interact with form on ${input.url}: ${error.message}`); } }, }; } /** * Manages browser cookies */ function createCookiesTool(logger: ILogger): MCPTool { return { name: 'web/cookies', description: 'Manage browser cookies', inputSchema: { type: 'object', properties: { action: { type: 'string', enum: ['get', 'set', 'delete', 'clear'], description: 'Cookie action', default: 'get', }, url: { type: 'string', description: 'URL for cookie context', }, domain: { type: 'string', description: 'Cookie domain', }, name: { type: 'string', description: 'Cookie name', }, value: { type: 'string', description: 'Cookie value (for set action)', }, path: { type: 'string', description: 'Cookie path', default: '/', }, expires: { type: 'string', description: 'Cookie expiration date', }, httpOnly: { type: 'boolean', description: 'HTTP only flag', default: false, }, secure: { type: 'boolean', description: 'Secure flag', default: false, }, sameSite: { type: 'string', enum: ['Strict', 'Lax', 'None'], description: 'SameSite policy', }, }, required: ['action', 'url'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Cookie management', { action: input.action || 'get', url: input.url, name: input.name, sessionId: context?.sessionId, }); try { // For a minimal implementation, we'll provide a message about needing external dependencies // In a production setting, you'd use puppeteer, playwright, or a specialized cookie library return { action: input.action || 'get', url: input.url, error: "Cookie management requires puppeteer, playwright, or specialized cookie libraries", suggestion: "For cookie management, install puppeteer or playwright", timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('Cookie management failed', { action: input.action, url: input.url, error: error.message }); throw new Error(`Failed to manage cookies for ${input.url}: ${error.message}`); } }, }; } /** * Manages web sessions */ function createSessionTool(logger: ILogger): MCPTool { return { name: 'web/session', description: 'Manage persistent web sessions', inputSchema: { type: 'object', properties: { action: { type: 'string', enum: ['create', 'restore', 'save', 'delete'], description: 'Session action', default: 'create', }, name: { type: 'string', description: 'Session name', }, url: { type: 'string', description: 'Initial URL for session', }, savePath: { type: 'string', description: 'Path to save session data', }, loadPath: { type: 'string', description: 'Path to load session data from', }, cookies: { type: 'array', items: { type: 'object', }, description: 'Cookies to set in session', }, localStorage: { type: 'object', description: 'localStorage items to set', }, timeout: { type: 'number', description: 'Session operation timeout in milliseconds', default: 30000, }, }, required: ['action'], }, handler: async (input: any, context?: WebToolContext) => { logger.info('Session management', { action: input.action || 'create', name: input.name, url: input.url, sessionId: context?.sessionId, }); try { // For a minimal implementation, we'll provide a message about needing external dependencies // In a production setting, you'd use puppeteer or playwright with session storage return { action: input.action || 'create', name: input.name || 'default', error: "Session management requires puppeteer or playwright libraries", suggestion: "For session management, install puppeteer or playwright", timestamp: new Date().toISOString(), }; } catch (error: any) { logger.error('Session management failed', { action: input.action, name: input.name, error: error.message }); throw new Error(`Failed to manage session ${input.name}: ${error.message}`); } }, }; }