import { tool } from '@strands-agents/sdk' import { z } from 'zod' function getKey(): string | null { try { const raw = localStorage.getItem('careless-v2-settings') const s = raw ? JSON.parse(raw) : {} // Prefer dedicated openaiApiKey, fall back to main key only if provider is openai return s.openaiApiKey || (s.provider === 'openai' ? s.apiKey : null) || null } catch { return null } } /** * Persist a base64 image to IndexedDB so it survives reloads and can be referenced * from conversation history. Returns an id we embed in markdown. */ async function storeImage(b64: string, mime: string, prompt: string): Promise { const id = `img_${Date.now()}_${Math.random().toString(36).slice(2, 8)}` try { const db = await openImageDb() const tx = db.transaction('images', 'readwrite') tx.objectStore('images').put({ id, b64, mime, prompt, createdAt: Date.now() }) await new Promise((res, rej) => { tx.oncomplete = () => res(); tx.onerror = () => rej(tx.error) }) } catch (e) { console.warn('[openai-image] IDB store failed, image is session-only', e) } return id } function openImageDb(): Promise { return new Promise((resolve, reject) => { const req = indexedDB.open('careless-images', 1) req.onupgradeneeded = () => { const db = req.result if (!db.objectStoreNames.contains('images')) db.createObjectStore('images', { keyPath: 'id' }) } req.onsuccess = () => resolve(req.result) req.onerror = () => reject(req.error) }) } const GPT_IMAGE_MODELS = ['gpt-image-1.5', 'gpt-image-1', 'gpt-image-1-mini'] const GPT_IMAGE_SIZES = ['auto', '1024x1024', '1536x1024', '1024x1536'] as const const DALL_E_3_SIZES = ['1024x1024', '1792x1024', '1024x1792'] as const export const generateImageTool = tool({ name: 'generate_image', description: 'Generate an image from a text prompt via OpenAI. Supports gpt-image-1.5 (default, best), gpt-image-1, gpt-image-1-mini, dall-e-3, dall-e-2. Returns a data-URL embedded in markdown so it renders inline.', inputSchema: z.object({ prompt: z.string().min(1).max(32000).describe('Text description of the image'), model: z .enum(['gpt-image-1.5', 'gpt-image-1', 'gpt-image-1-mini', 'dall-e-3', 'dall-e-2']) .optional() .describe('Model. Default: gpt-image-1.5 (recommended).'), size: z .string() .optional() .describe('Size. gpt-image: auto|1024x1024|1536x1024|1024x1536. dall-e-3: 1024x1024|1792x1024|1024x1792. Default: 1024x1024.'), quality: z .enum(['auto', 'low', 'medium', 'high', 'standard', 'hd']) .optional() .describe('Quality. gpt-image: auto|low|medium|high. dall-e-3: standard|hd. Default: auto.'), format: z.enum(['png', 'jpeg', 'webp']).optional().describe('Output format (gpt-image only). Default: png.'), background: z.enum(['transparent', 'opaque', 'auto']).optional().describe('Background (gpt-image only). Default: auto. Transparent requires png/webp.'), compression: z.number().min(0).max(100).optional().describe('Compression 0-100 for jpeg/webp (gpt-image only).'), n: z.number().min(1).max(4).optional().describe('Number of images (1-4). Default: 1.'), }), callback: async (input) => { try { const key = getKey() if (!key) { return JSON.stringify({ status: 'error', error: 'OpenAI API key required. Add it in Settings → OpenAI.', }) } const model = input.model || 'gpt-image-1.5' const isGptImage = GPT_IMAGE_MODELS.includes(model) const isDallE3 = model === 'dall-e-3' // Build body based on model family const body: Record = { model, prompt: input.prompt, n: input.n || 1, } // Size handling if (input.size) { body.size = input.size } else { body.size = '1024x1024' } // Quality handling — map generic values to model-specific if (input.quality) { if (isDallE3 && !['standard', 'hd'].includes(input.quality)) { body.quality = 'standard' } else if (isGptImage && !['auto', 'low', 'medium', 'high'].includes(input.quality)) { body.quality = 'auto' } else { body.quality = input.quality } } // GPT-image-only params if (isGptImage) { if (input.format) body.output_format = input.format if (input.background) body.background = input.background if (input.compression !== undefined && (input.format === 'jpeg' || input.format === 'webp')) { body.output_compression = input.compression } } // DALL-E 2/3 need explicit b64_json response_format (GPT-image always returns b64) if (!isGptImage) { body.response_format = 'b64_json' } const res = await fetch('https://api.openai.com/v1/images/generations', { method: 'POST', headers: { Authorization: `Bearer ${key}`, 'Content-Type': 'application/json', }, body: JSON.stringify(body), }) const data = await res.json() if (!res.ok) { return JSON.stringify({ status: 'error', error: data.error?.message || `HTTP ${res.status}`, code: data.error?.code, type: data.error?.type, }) } const images = data.data || [] if (!images.length) { return JSON.stringify({ status: 'error', error: 'No images returned' }) } const mime = `image/${(data.output_format || input.format || 'png')}` const results = await Promise.all( images.map(async (img: { b64_json?: string; url?: string; revised_prompt?: string }) => { const b64 = img.b64_json if (!b64) { // Shouldn't happen for our configured request but handle gracefully return { url: img.url, revised_prompt: img.revised_prompt } } const id = await storeImage(b64, mime, input.prompt) const dataUrl = `data:${mime};base64,${b64}` return { id, dataUrl, revised_prompt: img.revised_prompt } }) ) // Build markdown so the agent's response renders images inline. // The MarkdownRenderer's handler will wire click-to-fullscreen. const markdown = results .map((r, i) => { const alt = (r as any).revised_prompt || input.prompt const src = (r as any).dataUrl || (r as any).url return `![${alt.replace(/[\[\]]/g, '').slice(0, 120)}](${src})` }) .join('\n\n') return JSON.stringify({ status: 'success', model, size: body.size, count: results.length, usage: data.usage, images: results.map((r: any) => ({ id: r.id, revised_prompt: r.revised_prompt, // Do NOT echo dataUrl back to agent context — it would blow up tokens. // The markdown below already embeds it for rendering. has_data_url: !!r.dataUrl, url: r.url, })), markdown, hint: 'Include the `markdown` field in your reply so the user sees the image.', }) } catch (err: unknown) { return JSON.stringify({ status: 'error', error: (err as Error).message }) } }, }) export const editImageTool = tool({ name: 'edit_image', description: 'Edit or combine existing images via OpenAI gpt-image-1.5 / gpt-image-1. Pass 1-16 images as data URLs or https URLs plus a prompt describing the edit.', inputSchema: z.object({ prompt: z.string().min(1).describe('Description of the desired edit'), images: z .array(z.string()) .min(1) .max(16) .describe('Array of data URLs (data:image/...) or https URLs of input images'), model: z .enum(['gpt-image-1.5', 'gpt-image-1', 'gpt-image-1-mini']) .optional() .describe('Model. Default: gpt-image-1.5.'), size: z.enum(GPT_IMAGE_SIZES).optional().describe('Output size. Default: auto.'), quality: z.enum(['auto', 'low', 'medium', 'high']).optional().describe('Default: auto.'), format: z.enum(['png', 'jpeg', 'webp']).optional(), background: z.enum(['transparent', 'opaque', 'auto']).optional(), mask: z.string().optional().describe('Optional mask as data URL or https URL (alpha channel indicates edit region)'), n: z.number().min(1).max(4).optional(), }), callback: async (input) => { try { const key = getKey() if (!key) return JSON.stringify({ status: 'error', error: 'OpenAI API key required' }) const model = input.model || 'gpt-image-1.5' // /v1/images/edits with JSON body + images[] array is supported on newer GPT-image models const body: Record = { model, prompt: input.prompt, images: input.images.map((url) => ({ image_url: url })), n: input.n || 1, } if (input.mask) body.mask = { image_url: input.mask } if (input.size) body.size = input.size if (input.quality) body.quality = input.quality if (input.format) body.output_format = input.format if (input.background) body.background = input.background const res = await fetch('https://api.openai.com/v1/images/edits', { method: 'POST', headers: { Authorization: `Bearer ${key}`, 'Content-Type': 'application/json', }, body: JSON.stringify(body), }) const data = await res.json() if (!res.ok) { return JSON.stringify({ status: 'error', error: data.error?.message || `HTTP ${res.status}`, code: data.error?.code, }) } const images = data.data || [] const mime = `image/${data.output_format || input.format || 'png'}` const results = await Promise.all( images.map(async (img: { b64_json?: string }) => { if (!img.b64_json) return null const id = await storeImage(img.b64_json, mime, `edit: ${input.prompt}`) return { id, dataUrl: `data:${mime};base64,${img.b64_json}` } }) ) const markdown = results .filter(Boolean) .map((r: any) => `![${input.prompt.replace(/[\[\]]/g, '').slice(0, 120)}](${r.dataUrl})`) .join('\n\n') return JSON.stringify({ status: 'success', model, count: results.filter(Boolean).length, usage: data.usage, images: results.filter(Boolean).map((r: any) => ({ id: r.id })), markdown, hint: 'Include the `markdown` in your reply to render the edited image.', }) } catch (err) { return JSON.stringify({ status: 'error', error: (err as Error).message }) } }, }) export const OPENAI_IMAGE_TOOLS = [generateImageTool, editImageTool]