import axios from 'axios'; import { z } from 'zod'; import { DEFAULT_VISION_MODEL, SUPPORTED_VISION_MODELS } from '../config.js'; import type { ImageAnalysisParams, AnalysisResult, GLMRequest, GLMResponse } from '../types/index.js'; // 验证参数的Zod schema const ImageAnalysisParamsSchema = z.object({ imageUrl: z.string().url('必须提供有效的图片URL'), prompt: z.string().optional().default('请详细描述这张图片的内容'), model: z.enum(SUPPORTED_VISION_MODELS as [string, ...string[]]).optional().default(DEFAULT_VISION_MODEL), temperature: z.number().min(0).max(1).optional().default(0.7), maxTokens: z.number().min(1).max(4096).optional().default(1024) }); export class ImageUnderstandingTool { private apiKey: string; private baseUrl: string = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'; constructor(apiKey: string) { if (!apiKey) { throw new Error('API密钥不能为空'); } this.apiKey = apiKey; } /** * 分析图片内容 * @param params 分析参数 * @returns 分析结果 */ async analyzeImage(params: ImageAnalysisParams): Promise { try { // 验证输入参数 const validatedParams = ImageAnalysisParamsSchema.parse(params); // 构建请求体 const requestBody: GLMRequest = { model: validatedParams.model, messages: [{ role: 'user', content: [ { type: 'text', text: validatedParams.prompt }, { type: 'image_url', image_url: { url: validatedParams.imageUrl } } ] }], temperature: validatedParams.temperature, max_tokens: validatedParams.maxTokens, stream: false }; // 发送请求 const response = await axios.post(this.baseUrl, requestBody, { headers: { 'Authorization': `Bearer ${this.apiKey}`, 'Content-Type': 'application/json' }, timeout: 30000 // 30秒超时 }); // 检查响应 if (!response.data || !response.data.choices || response.data.choices.length === 0) { throw new Error('API返回的响应格式不正确'); } const choice = response.data.choices[0]; if (!choice.message || !choice.message.content) { throw new Error('API返回的内容为空'); } return { success: true, analysis: choice.message.content, model: response.data.model, tokenUsage: { promptTokens: response.data.usage.prompt_tokens, completionTokens: response.data.usage.completion_tokens, totalTokens: response.data.usage.total_tokens } }; } catch (error) { // 处理各种错误类型 if (error instanceof z.ZodError) { return { success: false, analysis: '', model: DEFAULT_VISION_MODEL, error: `参数验证失败: ${error.errors.map(e => e.message).join(', ')}` }; } if (axios.isAxiosError(error)) { const status = error.response?.status; const message = error.response?.data?.error?.message || error.message; let errorMessage = `API请求失败`; if (status === 401) { errorMessage = 'API密钥无效或已过期'; } else if (status === 403) { errorMessage = '没有权限访问该API'; } else if (status === 429) { errorMessage = 'API调用频率超限,请稍后再试'; } else if (status === 500) { errorMessage = 'API服务器内部错误'; } else { errorMessage = `API请求失败: ${message}`; } return { success: false, analysis: '', model: DEFAULT_VISION_MODEL, error: errorMessage }; } return { success: false, analysis: '', model: DEFAULT_VISION_MODEL, error: error instanceof Error ? error.message : '未知错误' }; } } /** * 验证API密钥是否有效 * @returns 是否有效 */ async validateApiKey(): Promise { try { // 使用一个简单的测试图片URL来验证API密钥 const testImageUrl = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=='; const result = await this.analyzeImage({ imageUrl: testImageUrl, prompt: '这是什么?', model: DEFAULT_VISION_MODEL }); return result.success; } catch (error) { return false; } } /** * 获取支持的模型列表 * @returns 模型列表 */ getSupportedModels(): string[] { return SUPPORTED_VISION_MODELS; } /** * 获取模型描述 * @param model 模型名称 * @returns 模型描述 */ getModelDescription(model: string): string { const descriptions = { [DEFAULT_VISION_MODEL]: '图像理解能力强,支持多图解析与推理', } as Record; return descriptions[model as keyof typeof descriptions] || '未知模型'; } }