# Configuration file for hazo_llm_api package # All configurable values should be stored here [logging] # Log file path for winston logging logfile=logs/hazo_llm_api.log # Minimum log level: debug, info, warn, error level=info # Max file size before rotation (e.g., '10m', '100k') max_size=10m # Max number of rotated files to keep max_files=5 # Also log to console (true/false) console_enabled=true [package] # Package build configuration build_dir=dist source_dir=src [test_app] # Test application configuration port=3000 host=localhost [ui] # UI component defaults default_theme=light sidebar_width=256 [llm] # LLM API global configuration # Comma-separated list of enabled LLM providers (e.g., gemini,openai) # Or JSON array format: ["gemini", "openai"] enabled_llms=["gemini", "qwen"] # Primary/default LLM to use when not specified in function calls primary_llm=gemini # SQLite database path relative to app root sqlite_path=prompt_library.sqlite # ============================================================================= # DEPRECATED: Legacy sections below kept for backward compatibility # Migrate to new [llm_gemini] section below # ============================================================================= ; [gemini] ; # Legacy: Gemini API configuration - MIGRATED to [llm_gemini] section below ; api_url=https://generativelanguage.googleapis.com/v1/models/gemini-2.5-flash:generateContent ; api_url_image=https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent ; [gemini_text_api] ; # Legacy: Gemini Text API generation configuration - MIGRATED to [llm_gemini] section below ; [gemini_image_api] ; # Legacy: Gemini Image API generation configuration - MIGRATED to [llm_gemini] section below [llm_gemini] # Gemini Provider Configuration # Note: API key must be in .env.local as GEMINI_API_KEY (not in config for security) # Base API URL for text generation api_url=https://generativelanguage.googleapis.com/v1/models/gemini-2.5-flash:generateContent # API URL for image generation (optional, uses api_url if not set) api_url_image=https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent ; api_url_image=https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent # Per-service model configuration (optional, uses model in URL if not specified) # Model for text_text service ; model_text_text=gemini-2.5-flash # Model for image_text service ; model_image_text=gemini-2.5-flash # Model for text_image service ; model_text_image=gemini-2.5-flash-image # Model for image_image service ; model_image_image=gemini-2.5-flash-image # Model for document_text service (PDF analysis) ; model_document_text=gemini-2.5-flash # Capabilities this provider supports (JSON array) # Options: text_text, image_text, text_image, image_image, document_text capabilities=["text_text", "image_text", "text_image", "image_image", "document_text"] # Generation config for text API calls # These parameters are optional - only include them in API calls if uncommented # # temperature - Controls randomness in output (0.0-2.0) # Lower values = more deterministic, higher values = more creative ; text_temperature=0.7 # # maxOutputTokens - Maximum number of tokens in the response # Limits response length to control costs and output size ; text_maxOutputTokens=1024 # # topP - Nucleus sampling probability (0.0-1.0) # Considers tokens with cumulative probability up to topP ; text_topP=0.95 # # topK - Top-k sampling # Considers only the top K most probable tokens ; text_topK=40 # # candidateCount - Number of response candidates to generate # Returns multiple alternative responses (1-8) ; text_candidateCount=1 # # stopSequences - JSON array of sequences that stop generation # Generation stops when any of these sequences is encountered ; text_stopSequences=["###END"] # # responseMimeType - Format of the response # Options: text/plain, application/json ; text_responseMimeType=text/plain # Generation config for image API calls (text_image, image_image, image_text) # These parameters are optional - only include them in API calls if uncommented # # temperature - Controls randomness in output (0.0-2.0) # For image analysis, lower values (0.3-0.5) typically provide more accurate descriptions image_temperature=0.1 # # maxOutputTokens - Maximum number of tokens in the response # For image descriptions, 300-500 is usually sufficient ; image_maxOutputTokens=300 # # topP - Nucleus sampling probability (0.0-1.0) # Considers tokens with cumulative probability up to topP ; image_topP=0.8 # # topK - Top-k sampling # Considers only the top K most probable tokens ; image_topK=20 # # candidateCount - Number of response candidates to generate # Returns multiple alternative responses (1-8) ; image_candidateCount=1 # # stopSequences - JSON array of sequences that stop generation # Generation stops when any of these sequences is encountered ; image_stopSequences=["###END"] # # responseMimeType - Format of the response # Options: text/plain, application/json ; image_responseMimeType=text/plain [llm_qwen] # Qwen Provider Configuration # Note: API key must be in .env.local as QWEN_API_KEY (not in config for security) # Base API URL (default: DashScope endpoint) # Used as fallback if service-specific URLs are not configured api_url=https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions # Per-service model configuration (required for each service you want to use) # Model for text_text service (e.g., qwen-max, qwen-plus) model_text_text=qwen-max # Model for image_text service (e.g., qwen-vl-max, qwen-vl-plus) model_image_text=qwen-vl-max model_text_image=qwen-image ; model_image_image=qwen-image-edit model_image_image=qwen-image-edit-plus # Model for text_image service (if supported) ; model_text_image=qwen-vl-max # Model for image_image service (if supported) ; model_image_image=qwen-vl-max # Per-service API URL configuration (optional, uses api_url if not specified) # API URL for text_text service ; api_url_text_text=https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions # API URL for image_text service ; api_url_image_text=https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions # API URL for text_image service (image generation) ; api_url_text_image=https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis # API URL for image_image service (image editing) # Try multimodal generation endpoint for qwen-image-edit api_url_image_image=https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation # Capabilities this provider supports (JSON array) # Options: text_text, image_text, text_image, image_image # Note: Qwen typically supports text_text and image_text, image generation/transformation may not be available capabilities=["text_text", "image_text", "text_image", "image_image"] # Default system instruction (optional) ; system_instruction=You are a helpful and friendly assistant. # Generation config for text API calls # These parameters are optional - only include them in API calls if uncommented # # temperature - Controls randomness in output (0.0-2.0) # Lower values = more deterministic, higher values = more creative ; text_temperature=0.8 # # max_tokens - Maximum number of tokens in the response # Limits response length to control costs and output size ; text_max_tokens=1024 # # top_p - Nucleus sampling probability (0.0-1.0) # Considers tokens with cumulative probability up to top_p ; text_top_p=0.95 # # top_k - Top-k sampling # Considers only the top K most probable tokens ; text_top_k=40 # # stop - JSON array of sequences that stop generation # Generation stops when any of these sequences is encountered ; text_stop=["###END"] # Generation config for image API calls (image_text, text_image, image_image) # These parameters are optional - only include them in API calls if uncommented # # temperature - Controls randomness in output (0.0-2.0) # For image analysis, lower values (0.3-0.5) typically provide more accurate descriptions ; image_temperature=0.4 # # max_tokens - Maximum number of tokens in the response # For image descriptions, 300-500 is usually sufficient ; image_max_tokens=500 # # top_p - Nucleus sampling probability (0.0-1.0) # Considers tokens with cumulative probability up to top_p ; image_top_p=0.8 # # top_k - Top-k sampling # Considers only the top K most probable tokens ; image_top_k=20 # # stop - JSON array of sequences that stop generation # Generation stops when any of these sequences is encountered ; image_stop=["###END"] [llm_anthropic] ; Anthropic API configuration ; Set ANTHROPIC_API_KEY in .env.local ; api_key_env = ANTHROPIC_API_KEY api_url = https://api.anthropic.com/v1/messages api_version = 2023-06-01 model_text_text = claude-sonnet-4-6 model_image_text = claude-sonnet-4-6 model_document_text = claude-sonnet-4-6 text_max_tokens = 8192 ; capabilities = text_text, image_text, document_text, text_text_stream [llm_openai] ; OpenAI API configuration ; Set OPENAI_API_KEY in .env.local ; api_key_env = OPENAI_API_KEY api_url = https://api.openai.com/v1/chat/completions api_url_image = https://api.openai.com/v1/images/generations api_url_embed = https://api.openai.com/v1/embeddings model_text_text = gpt-4o model_image_text = gpt-4o model_text_image = gpt-image-1 model_embed = text-embedding-3-small ; capabilities = text_text, image_text, text_image, text_text_stream, embed [llm_deepseek] ; DeepSeek API configuration ; Set DEEPSEEK_API_KEY in .env.local ; api_key_env = DEEPSEEK_API_KEY api_url = https://api.deepseek.com/v1/chat/completions model_text_text = deepseek-chat ; capabilities = text_text, text_text_stream [database] # Database configuration # Enable WAL mode for better SQLite performance wal_mode=true # Table name for prompts prompts_table=prompts_library [log.overrides] ; Override log levels per namespace. Example: ; hazo_llm_api = debug ; hazo_llm_api.providers.gemini = trace