export type AceStepAudioInpaintInput = { /** * URL of the audio file to be inpainted. */ audio_url: string | Blob | File; /** * end time in seconds for the inpainting process. Default value: `30` */ end_time?: number; /** * Whether the end time is relative to the start or end of the audio. Default value: `"start"` */ end_time_relative_to?: "start" | "end"; /** * Granularity scale for the generation process. Higher values can reduce artifacts. Default value: `10` */ granularity_scale?: number; /** * Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps) Default value: `0.5` */ guidance_interval?: number; /** * Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay. */ guidance_interval_decay?: number; /** * Guidance scale for the generation. Default value: `15` */ guidance_scale?: number; /** * Type of CFG to use for the generation process. Default value: `"apg"` */ guidance_type?: "cfg" | "apg" | "cfg_star"; /** * Lyric guidance scale for the generation. Default value: `1.5` */ lyric_guidance_scale?: number; /** * Lyrics to be sung in the audio. If not provided or if [inst] or [instrumental] is the content of this field, no lyrics will be sung. Use control structures like [verse], [chorus] and [bridge] to control the structure of the song. Default value: `""` */ lyrics?: string; /** * Minimum guidance scale for the generation after the decay. Default value: `3` */ minimum_guidance_scale?: number; /** * Number of steps to generate the audio. Default value: `27` */ number_of_steps?: number; /** * Scheduler to use for the generation process. Default value: `"euler"` */ scheduler?: "euler" | "heun"; /** * Random seed for reproducibility. If not provided, a random seed will be used. */ seed?: number; /** * start time in seconds for the inpainting process. */ start_time?: number; /** * Whether the start time is relative to the start or end of the audio. Default value: `"start"` */ start_time_relative_to?: "start" | "end"; /** * Tag guidance scale for the generation. Default value: `5` */ tag_guidance_scale?: number; /** * Comma-separated list of genre tags to control the style of the generated audio. */ tags: string; /** * Variance for the inpainting process. Higher values can lead to more diverse results. Default value: `0.5` */ variance?: number; }; export type AceStepAudioOutpaintInput = { /** * URL of the audio file to be outpainted. */ audio_url: string | Blob | File; /** * Duration in seconds to extend the audio from the end. Default value: `30` */ extend_after_duration?: number; /** * Duration in seconds to extend the audio from the start. */ extend_before_duration?: number; /** * Granularity scale for the generation process. Higher values can reduce artifacts. Default value: `10` */ granularity_scale?: number; /** * Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps) Default value: `0.5` */ guidance_interval?: number; /** * Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay. */ guidance_interval_decay?: number; /** * Guidance scale for the generation. Default value: `15` */ guidance_scale?: number; /** * Type of CFG to use for the generation process. Default value: `"apg"` */ guidance_type?: "cfg" | "apg" | "cfg_star"; /** * Lyric guidance scale for the generation. Default value: `1.5` */ lyric_guidance_scale?: number; /** * Lyrics to be sung in the audio. If not provided or if [inst] or [instrumental] is the content of this field, no lyrics will be sung. Use control structures like [verse], [chorus] and [bridge] to control the structure of the song. Default value: `""` */ lyrics?: string; /** * Minimum guidance scale for the generation after the decay. Default value: `3` */ minimum_guidance_scale?: number; /** * Number of steps to generate the audio. Default value: `27` */ number_of_steps?: number; /** * Scheduler to use for the generation process. Default value: `"euler"` */ scheduler?: "euler" | "heun"; /** * Random seed for reproducibility. If not provided, a random seed will be used. */ seed?: number; /** * Tag guidance scale for the generation. Default value: `5` */ tag_guidance_scale?: number; /** * Comma-separated list of genre tags to control the style of the generated audio. */ tags: string; }; export type AceStepAudioToAudioInput = { /** * URL of the audio file to be outpainted. */ audio_url: string | Blob | File; /** * Whether to edit the lyrics only or remix the audio. Default value: `"remix"` */ edit_mode?: "lyrics" | "remix"; /** * Granularity scale for the generation process. Higher values can reduce artifacts. Default value: `10` */ granularity_scale?: number; /** * Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps) Default value: `0.5` */ guidance_interval?: number; /** * Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay. */ guidance_interval_decay?: number; /** * Guidance scale for the generation. Default value: `15` */ guidance_scale?: number; /** * Type of CFG to use for the generation process. Default value: `"apg"` */ guidance_type?: "cfg" | "apg" | "cfg_star"; /** * Lyric guidance scale for the generation. Default value: `1.5` */ lyric_guidance_scale?: number; /** * Lyrics to be sung in the audio. If not provided or if [inst] or [instrumental] is the content of this field, no lyrics will be sung. Use control structures like [verse], [chorus] and [bridge] to control the structure of the song. Default value: `""` */ lyrics?: string; /** * Minimum guidance scale for the generation after the decay. Default value: `3` */ minimum_guidance_scale?: number; /** * Number of steps to generate the audio. Default value: `27` */ number_of_steps?: number; /** * Original lyrics of the audio file. Default value: `""` */ original_lyrics?: string; /** * Original seed of the audio file. */ original_seed?: number; /** * Original tags of the audio file. */ original_tags: string; /** * Scheduler to use for the generation process. Default value: `"euler"` */ scheduler?: "euler" | "heun"; /** * Random seed for reproducibility. If not provided, a random seed will be used. */ seed?: number; /** * Tag guidance scale for the generation. Default value: `5` */ tag_guidance_scale?: number; /** * Comma-separated list of genre tags to control the style of the generated audio. */ tags: string; }; export type AceStepInput = { /** * The duration of the generated audio in seconds. Default value: `60` */ duration?: number; /** * Granularity scale for the generation process. Higher values can reduce artifacts. Default value: `10` */ granularity_scale?: number; /** * Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps) Default value: `0.5` */ guidance_interval?: number; /** * Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay. */ guidance_interval_decay?: number; /** * Guidance scale for the generation. Default value: `15` */ guidance_scale?: number; /** * Type of CFG to use for the generation process. Default value: `"apg"` */ guidance_type?: "cfg" | "apg" | "cfg_star"; /** * Lyric guidance scale for the generation. Default value: `1.5` */ lyric_guidance_scale?: number; /** * Lyrics to be sung in the audio. If not provided or if [inst] or [instrumental] is the content of this field, no lyrics will be sung. Use control structures like [verse], [chorus] and [bridge] to control the structure of the song. Default value: `""` */ lyrics?: string; /** * Minimum guidance scale for the generation after the decay. Default value: `3` */ minimum_guidance_scale?: number; /** * Number of steps to generate the audio. Default value: `27` */ number_of_steps?: number; /** * Scheduler to use for the generation process. Default value: `"euler"` */ scheduler?: "euler" | "heun"; /** * Random seed for reproducibility. If not provided, a random seed will be used. */ seed?: number; /** * Tag guidance scale for the generation. Default value: `5` */ tag_guidance_scale?: number; /** * Comma-separated list of genre tags to control the style of the generated audio. */ tags: string; }; export type AceStepOutput = { /** * The generated audio file. */ audio: File; /** * The lyrics used in the generation process. */ lyrics: string; /** * The random seed used for the generation process. */ seed: number; /** * The genre tags used in the generation process. */ tags: string; }; export type AceStepPromptToAudioInput = { /** * The duration of the generated audio in seconds. Default value: `60` */ duration?: number; /** * Granularity scale for the generation process. Higher values can reduce artifacts. Default value: `10` */ granularity_scale?: number; /** * Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps) Default value: `0.5` */ guidance_interval?: number; /** * Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay. */ guidance_interval_decay?: number; /** * Guidance scale for the generation. Default value: `15` */ guidance_scale?: number; /** * Type of CFG to use for the generation process. Default value: `"apg"` */ guidance_type?: "cfg" | "apg" | "cfg_star"; /** * Whether to generate an instrumental version of the audio. */ instrumental?: boolean; /** * Lyric guidance scale for the generation. Default value: `1.5` */ lyric_guidance_scale?: number; /** * Minimum guidance scale for the generation after the decay. Default value: `3` */ minimum_guidance_scale?: number; /** * Number of steps to generate the audio. Default value: `27` */ number_of_steps?: number; /** * Prompt to control the style of the generated audio. This will be used to generate tags and lyrics. */ prompt: string; /** * Scheduler to use for the generation process. Default value: `"euler"` */ scheduler?: "euler" | "heun"; /** * Random seed for reproducibility. If not provided, a random seed will be used. */ seed?: number; /** * Tag guidance scale for the generation. Default value: `5` */ tag_guidance_scale?: number; }; export type AddSubtitlesToVideoInput = { /** * Background box color for subtitle text (use 'none' for no background, TikTok style uses no background) Default value: `none` */ background_color?: "black" | "white" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta" | "transparent" | "none"; /** * Opacity of subtitle background box if used (0.0-1.0, 0.0=transparent) */ background_opacity?: number; /** * Subtitle text color Default value: `"white"` */ font_color?: "white" | "black" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Any Google Font name from fonts.google.com (e.g., 'Montserrat', 'Poppins', 'BBH Sans Hegarty'). TikTok commonly uses bold sans-serif fonts. Default value: `"Montserrat"` */ font_name?: string; /** * Font size for subtitles (TikTok style uses larger, bold text) Default value: `70` */ font_size?: number; /** * Font weight (TikTok style typically uses bold or black) Default value: `"bold"` */ font_weight?: "normal" | "bold" | "black"; /** * Vertical position of subtitles on screen Default value: `"bottom"` */ position?: "bottom" | "top" | "center"; /** * Text stroke/outline color Default value: `"black"` */ stroke_color?: "black" | "white" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Text stroke/outline width in pixels (0 for no stroke, TikTok uses 2-4) Default value: `3` */ stroke_width?: number; /** * List of subtitle segments. Click + to add more subtitle segments. */ subtitles: Array; /** * URL of the video file to add subtitles to */ video_url: string | Blob | File; /** * Vertical offset from position in pixels (-500 to 500, positive moves down) */ y_offset?: number; }; export type AddTextToImageInput = { /** * Text anchor point Default value: `"center"` */ anchor?: "left" | "center" | "right"; /** * Optional background color for text box (use 'none' or leave empty for no background) */ background_color?: "black" | "white" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta" | "transparent" | "none"; /** * Padding around text in pixels (only if background_color is set) Default value: `10` */ background_padding?: number; /** * Text color Default value: `"white"` */ font_color?: "white" | "black" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Font size in pixels Default value: `40` */ font_size?: number; /** * The URL of the image to add text to */ image_url: string | Blob | File; /** * Output format for the result image Default value: `"png"` */ output_format?: "png" | "jpg" | "jpeg" | "webp"; /** * Color of text stroke/outline Default value: `"black"` */ stroke_color?: "black" | "white" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Width of text outline/stroke in pixels (0 for no stroke) */ stroke_width?: number; /** * Text to add to the image */ text: string; /** * X position as percentage of image width (0-100) Default value: `50` */ x_percent?: number; /** * Y position as percentage of image height (0-100) Default value: `50` */ y_percent?: number; }; export type AdvancedInput = { /** * Default caption to use if no caption is found for a media file. */ default_caption?: string; /** * The multiplier for the learning rate for the high resolution training stage. This is useful when you want to train the high resolution stage with a different learning rate than the low resolution stage. Default value: `2.5` */ hires_lr_multiplier?: number; /** * Whether to include synthetic captions. */ include_synthetic_captions?: boolean; /** * The percentage of the image to use for low resolution training. 0.0 means no low resolution training, 1.0 means full low resolution training. Default value: `0.3` */ low_res_percentage?: number; /** * URL to the training data. */ training_data_url: string | Blob | File; /** * List of training stages. Each stage can have different parameters. */ training_stages?: Array; /** * Trigger phrase for the model. */ trigger_phrase: string; /** * Whether to use face cropping for the training data. When enabled, images will be cropped to the face before resizing. */ use_face_cropping?: boolean; /** * Whether to use face detection for the training data. When enabled, images will use the center of the face as the center of the image when resizing. Default value: `true` */ use_face_detection?: boolean; /** * Whether to use masks for the training data. Default value: `true` */ use_masks?: boolean; }; export type AdvancedLipsyncCreateTaskInput = { /** * Specified Face for Lip-Sync. Includes Face ID, lip movement reference data, etc. Currently only supports one person lip-sync. */ face_choose: FaceChoice; /** * The session id of the lip-sync task */ session_id: string; }; export type AdvancedLipsyncOutput = { /** * The generated lip-sync video */ videos: Array; }; export type Aesthetics = { /** * The aesthetic score of the image (e.g., 'very high', 'high', 'medium', 'low'). */ aesthetic_score?: string; /** * The color scheme of the image to be generated. */ color_scheme?: string; /** * The composition of the image to be generated. */ composition?: string; /** * The mood and atmosphere of the image to be generated. */ mood_atmosphere?: string; /** * The preference score of the image (e.g., 'very high', 'high', 'medium', 'low'). */ preference_score?: string; }; export type AgeModifyInput = { /** * Aspect ratio for 4K output (default: 3:4 for portraits) */ aspect_ratio?: AspectRatio; /** * Portrait image URL for age modification */ image_url: string | Blob | File; /** * Default value: `true` */ preserve_identity?: boolean; /** * Default value: `30` */ target_age?: number; }; export type AIAvatarInput = { /** * The URL of the audio file. */ audio_url: string | Blob | File; /** * The URL of the image to use as your avatar */ image_url: string | Blob | File; /** * The prompt to use for the video generation. Default value: `"."` */ prompt?: string; }; export type AiAvatarMultiInput = { /** * The acceleration level to use for generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * The URL of the Person 1 audio file. */ first_audio_url: string | Blob | File; /** * URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped. */ image_url: string | Blob | File; /** * Number of frames to generate. Must be between 81 to 129 (inclusive). If the number of frames is greater than 81, the video will be generated with 1.25x more billing units. Default value: `181` */ num_frames?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the video to generate. Must be either 480p or 720p. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The URL of the Person 2 audio file. */ second_audio_url?: string | Blob | File; /** * Random seed for reproducibility. If None, a random seed is chosen. Default value: `81` */ seed?: number; /** * Whether to use only the first audio file. */ use_only_first_audio?: boolean; }; export type AiAvatarMultiTextInput = { /** * The acceleration level to use for generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * The text input to guide video generation. */ first_text_input: string; /** * URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped. */ image_url: string | Blob | File; /** * Number of frames to generate. Must be between 81 to 129 (inclusive). If the number of frames is greater than 81, the video will be generated with 1.25x more billing units. Default value: `191` */ num_frames?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the video to generate. Must be either 480p or 720p. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The text input to guide video generation. */ second_text_input: string; /** * Random seed for reproducibility. If None, a random seed is chosen. Default value: `81` */ seed?: number; /** * The first person's voice to use for speech generation Default value: `"Sarah"` */ voice1?: "Aria" | "Roger" | "Sarah" | "Laura" | "Charlie" | "George" | "Callum" | "River" | "Liam" | "Charlotte" | "Alice" | "Matilda" | "Will" | "Jessica" | "Eric" | "Chris" | "Brian" | "Daniel" | "Lily" | "Bill"; /** * The second person's voice to use for speech generation Default value: `"Roger"` */ voice2?: "Aria" | "Roger" | "Sarah" | "Laura" | "Charlie" | "George" | "Callum" | "River" | "Liam" | "Charlotte" | "Alice" | "Matilda" | "Will" | "Jessica" | "Eric" | "Chris" | "Brian" | "Daniel" | "Lily" | "Bill"; }; export type AIAvatarOutput = { /** * Duration of the output video in seconds. */ duration: number; /** * The generated video */ video: File; }; export type AiAvatarSingleTextInput = { /** * The acceleration level to use for generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped. */ image_url: string | Blob | File; /** * Number of frames to generate. Must be between 81 to 129 (inclusive). If the number of frames is greater than 81, the video will be generated with 1.25x more billing units. Default value: `136` */ num_frames?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the video to generate. Must be either 480p or 720p. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. If None, a random seed is chosen. Default value: `42` */ seed?: number; /** * The text input to guide video generation. */ text_input: string; /** * The voice to use for speech generation */ voice: "Aria" | "Roger" | "Sarah" | "Laura" | "Charlie" | "George" | "Callum" | "River" | "Liam" | "Charlotte" | "Alice" | "Matilda" | "Will" | "Jessica" | "Eric" | "Chris" | "Brian" | "Daniel" | "Lily" | "Bill"; }; export type AllInFocusInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The URL of the image to deblur and restore to all-in-focus. */ image_url: string | Blob | File; /** * The number of inference steps to perform. Lower values are faster but may reduce quality. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The same seed and the same input given to the same version of the model will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Resize the image so the longer side matches this value (in pixels). If not set, the original resolution is used (aligned to 16px). Recommended range: 512 to 2000. Default value: `512` */ target_long_side?: number; }; export type AmixAudioInput = { /** * List of audio file URLs to mix together */ audio_urls: Array; /** * Transition time in seconds for volume renormalization when an input ends Default value: `2` */ dropout_transition?: number; /** * How to determine output duration Default value: `"longest"` */ duration?: "longest" | "shortest" | "first"; /** * Always scale inputs instead of only doing summation. Prevents clipping Default value: `true` */ normalize?: boolean; /** * Optional weights for each audio input. If fewer weights than inputs, last weight applies to remaining. Default is 1.0 for all */ weights?: Array; }; export type AMTFrameInterpolationInput = { /** * Frames to interpolate */ frames: Array; /** * Output frames per second Default value: `24` */ output_fps?: number; /** * Number of recursive interpolation passes Default value: `4` */ recursive_interpolation_passes?: number; }; export type AmtInterpolationInput = { /** * Output frames per second Default value: `24` */ output_fps?: number; /** * Number of recursive interpolation passes Default value: `2` */ recursive_interpolation_passes?: number; /** * URL of the video to be processed */ video_url: string | Blob | File; }; export type AnimatediffSparsectrlLcmInput = { /** * The type of controlnet to use for generating the video. The controlnet determines how the video will be animated. Default value: `"scribble"` */ controlnet_type?: "scribble" | "rgb"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` */ guidance_scale?: number; /** * The URL of the first keyframe to use for the generation. */ keyframe_0_image_url?: string | Blob | File; /** * The frame index of the first keyframe to use for the generation. */ keyframe_0_index?: number; /** * The URL of the second keyframe to use for the generation. */ keyframe_1_image_url?: string | Blob | File; /** * The frame index of the second keyframe to use for the generation. */ keyframe_1_index?: number; /** * The URL of the third keyframe to use for the generation. */ keyframe_2_image_url?: string | Blob | File; /** * The frame index of the third keyframe to use for the generation. */ keyframe_2_index?: number; /** * The negative prompt to use. Use it to specify what you don't want. Default value: `""` */ negative_prompt?: string; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps to generate your final result which can increase the amount of detail in your image. Default value: `4` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable * Diffusion will output the same image every time. */ seed?: number; }; export type AnimateDiffT2VInput = { /** * Number of frames per second to extract from the video. Default value: `8` */ fps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The motions to apply to the video. */ motions?: Array<"zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down">; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` */ negative_prompt?: string; /** * The number of frames to generate for the video. Default value: `16` */ num_frames?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the video. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The size of the video to generate. Default value: `square` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; }; export type AnimateDiffT2VTurboInput = { /** * Number of frames per second to extract from the video. Default value: `8` */ fps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` */ negative_prompt?: string; /** * The number of frames to generate for the video. Default value: `16` */ num_frames?: number; /** * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` */ num_inference_steps?: number; /** * The prompt to use for generating the video. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The size of the video to generate. Default value: `square` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; }; export type AnimateDiffV2VInput = { /** * The first N number of seconds of video to animate. Default value: `3` */ first_n_seconds?: number; /** * Number of frames per second to extract from the video. Default value: `8` */ fps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The motions to apply to the video. */ motions?: Array<"zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down">; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The strength of the input video in the final output. Default value: `0.7` */ strength?: number; /** * URL of the video. */ video_url: string | Blob | File; }; export type AnimateDiffV2VTurboInput = { /** * The first N number of seconds of video to animate. Default value: `3` */ first_n_seconds?: number; /** * Number of frames per second to extract from the video. Default value: `8` */ fps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` */ negative_prompt?: string; /** * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `12` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The strength of the input video in the final output. Default value: `0.7` */ strength?: number; /** * URL of the video. */ video_url: string | Blob | File; }; export type ArbiterImageImageInput = { /** * The inputs to use for the measurement. */ inputs: Array; /** * The measurements to use for the measurement. */ measurements: Array<"dists" | "mse" | "lpips" | "sdi" | "ssim">; }; export type ArbiterImageInput = { /** * The inputs to use for the measurement. */ inputs: Array; /** * The measurements to use for the measurement. */ measurements: Array<"arniqa" | "clip_iqa" | "musiq" | "nima" | "lapvar">; }; export type ArbiterImageOutput = { /** * The values of the measurements. */ values?: Array; }; export type ArbiterImageTextInput = { /** * The inputs to use for the measurement. */ inputs: Array; /** * The measurements to use for the measurement. */ measurements: Array; }; export type AspectRatio = { /** * Aspect ratio for 4K resolution output Default value: `"1:1"` */ ratio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4"; }; export type Audio = { /** * Overall bitrate of the media in bits per second */ bitrate: number; /** * Number of audio channels */ channels: number; /** * Codec used to encode the media */ codec: string; /** * Container format of the media file (e.g., 'mp4', 'mov') */ container: string; /** * MIME type of the media file */ content_type: string; /** * Duration of the media in seconds */ duration: number; /** * Original filename of the media */ file_name: string; /** * Size of the file in bytes */ file_size: number; /** * Type of media (always 'audio') Default value: `"audio"` */ media_type?: string; /** * Audio sample rate in Hz */ sample_rate: number; /** * URL where the media file can be accessed */ url: string; }; export type Audio2VideoInput = { /** * */ audio_url: string | Blob | File; /** * The avatar to use for the video */ avatar_id: "emily_vertical_primary" | "emily_vertical_secondary" | "marcus_vertical_primary" | "marcus_vertical_secondary" | "mira_vertical_primary" | "mira_vertical_secondary" | "jasmine_vertical_primary" | "jasmine_vertical_secondary" | "jasmine_vertical_walking" | "aisha_vertical_walking" | "elena_vertical_primary" | "elena_vertical_secondary" | "any_male_vertical_primary" | "any_female_vertical_primary" | "any_male_vertical_secondary" | "any_female_vertical_secondary" | "any_female_vertical_walking" | "emily_primary" | "emily_side" | "marcus_primary" | "marcus_side" | "aisha_walking" | "elena_primary" | "elena_side" | "any_male_primary" | "any_female_primary" | "any_male_side" | "any_female_side"; }; export type AudioCompressorInput = { /** * Attack time in milliseconds (how fast compression starts) Default value: `5` */ attack?: number; /** * URL of the audio file to compress */ audio_url: string | Blob | File; /** * Knee width in dB for soft knee compression (0 = hard knee) Default value: `2.83` */ knee?: number; /** * Makeup gain in dB to compensate for volume reduction Default value: `8` */ makeup?: number; /** * Output audio bitrate Default value: `"192k"` */ output_bitrate?: "128k" | "192k" | "256k" | "320k"; /** * Compression ratio (1 = no compression, higher = more compression) Default value: `3` */ ratio?: number; /** * Release time in milliseconds (how fast compression stops) Default value: `50` */ release?: number; /** * Threshold level in dB above which compression is applied (-60 to 0) Default value: `-18` */ threshold?: number; }; export type AudioEqualizerInput = { /** * URL of the audio file to equalize */ audio_url: string | Blob | File; /** * List of EQ bands to apply. Each band has frequency, width, and gain. */ bands: Array; /** * Output audio bitrate Default value: `"192k"` */ output_bitrate?: "128k" | "192k" | "256k" | "320k"; }; export type AudioFile = { /** * The bitrate of the audio (e.g., '192k' or 192000) */ bitrate?: string | number; /** * The number of channels in the audio */ channels?: number; /** * The mime type of the file. */ content_type?: string; /** * The duration of the audio */ duration?: number; /** * The name of the file. It will be auto-generated if not provided. */ file_name?: string; /** * The size of the file in bytes. */ file_size?: number; /** * The sample rate of the audio */ sample_rate?: number; /** * The URL where the file can be downloaded from. */ url: string; }; export type AudioInput = { /** * URL or data URI of the audio file to process. Supported formats: wav, mp3, aiff, aac, ogg, flac, m4a. */ audio_url: string | Blob | File; /** * This sets the upper limit for the number of tokens the model can generate in response. It won't produce more than this limit. The maximum value is the context length minus the prompt length. */ max_tokens?: number; /** * Name of the model to use. Charged based on actual token usage. */ model: string; /** * Prompt to be used for the audio processing */ prompt: string; /** * Should reasoning be the part of the final answer. */ reasoning?: boolean; /** * System prompt to provide context or instructions to the model */ system_prompt?: string; /** * This setting influences the variety in the model's responses. Lower values lead to more predictable and typical responses, while higher values encourage more diverse and less common responses. At 0, the model always gives the same response for a given input. Default value: `1` */ temperature?: number; }; export type AudioOutput = { /** * Generated output from audio processing */ output: string; /** * Token usage information */ usage: UsageInfo; }; export type AudioSetting = { /** * Bitrate of generated audio Default value: `"128000"` */ bitrate?: "32000" | "64000" | "128000" | "256000"; /** * Number of audio channels (1=mono, 2=stereo) Default value: `"1"` */ channel?: "1" | "2"; /** * Audio format Default value: `"mp3"` */ format?: "mp3" | "pcm" | "flac"; /** * Sample rate of generated audio Default value: `"32000"` */ sample_rate?: "8000" | "16000" | "22050" | "24000" | "32000" | "44100"; }; export type AudioTimeSpan = { /** * End time of the span in seconds */ end: number; /** * Whether to include (True) or exclude (False) sounds in this span Default value: `true` */ include?: boolean; /** * Start time of the span in seconds */ start: number; }; export type AudioToAudioInput = { /** * The audio clip to transform */ audio_url: string | Blob | File; /** * How strictly the diffusion process adheres to the prompt text (higher values make your audio closer to your prompt). Default value: `1` */ guidance_scale?: number; /** * The number of steps to denoise the audio for Default value: `8` */ num_inference_steps?: number; /** * The prompt to guide the audio generation */ prompt: string; /** * */ seed?: number; /** * Sometimes referred to as denoising, this parameter controls how much influence the `audio_url` parameter has on the generated audio. A value of 0 would yield audio that is identical to the input. A value of 1 would be as if you passed in no audio at all. Default value: `0.8` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The duration of the audio clip to generate. If not provided, it will be set to the duration of the input audio. */ total_seconds?: number; }; export type AudioTrack = { /** * Audio bitrate in bits per second */ bitrate: number; /** * Number of audio channels */ channels: number; /** * Audio codec used (e.g., 'aac', 'mp3') */ codec: string; /** * Audio sample rate in Hz */ sample_rate: number; }; export type AudioUnderstandingInput = { /** * URL of the audio file to analyze */ audio_url: string | Blob | File; /** * Whether to request a more detailed analysis of the audio */ detailed_analysis?: boolean; /** * The question or prompt about the audio content. */ prompt: string; }; export type AudioVolumeInput = { /** * URL of the audio file to process */ audio_url: string | Blob | File; /** * Fade-in duration in seconds from the start of the audio. None = no fade-in. */ fade_in?: number; /** * Fade-out duration in seconds before the end of the audio. None = no fade-out. */ fade_out?: number; /** * Output audio format. If not provided, the format is determined automatically. */ output_format?: "mp3" | "wav" | "aac" | "flac"; /** * Volume multiplier (0.0 = silent, 1.0 = unchanged, 2.0 = double volume) Default value: `1` */ volume?: number; }; export type AudioVolumeOutput = { /** * The processed audio file */ audio: File; /** * Audio duration in seconds */ duration_seconds: number; }; export type AuraFlowInput = { /** * Whether to perform prompt expansion (recommended) Default value: `true` */ expand_prompt?: boolean; /** * Classifier free guidance scale Default value: `3.5` */ guidance_scale?: number; /** * The number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to take Default value: `50` */ num_inference_steps?: number; /** * The prompt to generate images from */ prompt: string; /** * The seed to use for generating images */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type AuraFlowOutput = { /** * The generated images */ images: Array; /** * The expanded prompt */ prompt: string; /** * The seed used to generate the images */ seed: number; }; export type AuraSrInput = { /** * Checkpoint to use for upscaling. More coming soon. Default value: `"v1"` */ checkpoint?: "v1" | "v2"; /** * URL of the image to upscale. */ image_url: string | Blob | File; /** * Whether to use overlapping tiles for upscaling. Setting this to true helps remove seams but doubles the inference time. */ overlapping_tiles?: boolean; /** * Upscaling factor. More coming soon. Default value: `4` */ upscale_factor?: number; }; export type AuraSrOutput = { /** * Upscaled image */ image: Image; /** * Timings for each step in the pipeline. */ timings: unknown; }; export type AutoCaptionInput = { /** * Size of text in generated captions. Default value: `24` */ font_size?: number; /** * Left-to-right alignment of the text. Can be a string ('left', 'center', 'right') or a float (0.0-1.0) Default value: `center` */ left_align?: string | number; /** * Number of seconds the captions should stay on screen. A higher number will also result in more text being displayed at once. Default value: `1.5` */ refresh_interval?: number; /** * Width of the text strokes in pixels Default value: `1` */ stroke_width?: number; /** * Top-to-bottom alignment of the text. Can be a string ('top', 'center', 'bottom') or a float (0.0-1.0) Default value: `center` */ top_align?: string | number; /** * Colour of the text. Can be a RGB tuple, a color name, or an hexadecimal notation. Default value: `"white"` */ txt_color?: string; /** * Font for generated captions. Choose one in 'Arial','Standard','Garamond', 'Times New Roman','Georgia', or pass a url to a .ttf file Default value: `"Standard"` */ txt_font?: string; /** * URL to the .mp4 video with audio. Only videos of size <400MB are allowed. */ video_url: string | Blob | File; }; export type AutoCaptionOutput = { /** * URL to the caption .mp4 video. */ video_url: string | Blob | File; }; export type AutoSubtitleInput = { /** * Background color behind text ('none' or 'transparent' for no background) Default value: `"none"` */ background_color?: "black" | "white" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta" | "none" | "transparent"; /** * Background opacity (0.0 = fully transparent, 1.0 = fully opaque) */ background_opacity?: number; /** * Enable animation effects for subtitles (bounce style entrance) Default value: `true` */ enable_animation?: boolean; /** * Subtitle text color for non-active words Default value: `"white"` */ font_color?: "white" | "black" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Any Google Font name from fonts.google.com (e.g., 'Montserrat', 'Poppins', 'BBH Sans Hegarty') Default value: `"Montserrat"` */ font_name?: string; /** * Font size for subtitles (TikTok style uses larger text) Default value: `100` */ font_size?: number; /** * Font weight (TikTok style typically uses bold or black) Default value: `"bold"` */ font_weight?: "normal" | "bold" | "black"; /** * Color for the currently speaking word (karaoke-style highlight) Default value: `"purple"` */ highlight_color?: "white" | "black" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Language code for transcription (e.g., 'en', 'es', 'fr', 'de', 'it', 'pt', 'nl', 'ja', 'zh', 'ko') or 3-letter ISO code (e.g., 'eng', 'spa', 'fra') Default value: `"en"` */ language?: string; /** * Vertical position of subtitles Default value: `"bottom"` */ position?: "top" | "center" | "bottom"; /** * Text stroke/outline color Default value: `"black"` */ stroke_color?: "black" | "white" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Text stroke/outline width in pixels (0 for no stroke) Default value: `3` */ stroke_width?: number; /** * URL of the video file to add automatic subtitles to */ video_url: string | Blob | File; /** * Maximum number of words per subtitle segment. Use 1 for single-word display, 2-3 for short phrases, or 8-12 for full sentences. Default value: `3` */ words_per_subtitle?: number; /** * Vertical offset in pixels (positive = move down, negative = move up) Default value: `75` */ y_offset?: number; }; export type AutoSubtitleOutput = { /** * Number of subtitle segments generated */ subtitle_count: number; /** * Full transcription text */ transcription: string; /** * Additional transcription metadata from ElevenLabs (language, segments, etc.) */ transcription_metadata?: unknown; /** * The video with automatic subtitles */ video: File; /** * Word-level timing information from transcription service */ words?: Array; }; export type AvatarIVBackground = { /** * Type of background Default value: `"color"` */ type?: "color" | "image" | "video"; /** * Background value - hex color for 'color' type, URL for 'image' or 'video' type Default value: `"#FFFFFF"` */ value?: string; }; export type BabyVersionInput = { /** * The aspect ratio of the generated image. */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * URL of the image to transform into a baby version. */ image_url: string | Blob | File; /** * Number of inference steps for sampling. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The safety tolerance level for the generated image. 1 being the most strict and 6 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BackgroundRemovalInput = { /** * Input image (JPEG or PNG) */ image_url: string | Blob | File; /** * Output format Default value: `"rgba"` */ output_format?: "rgba" | "alpha" | "zip"; /** * When true, return result as a data URL instead of uploading to storage Default value: `true` */ sync_mode?: boolean; }; export type BackgroundRemovalOutput = { /** * Result file (for zip output format) */ file?: File; /** * Result image (for rgba/alpha output formats) */ image?: Image; }; export type BagelEditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The image to edit. */ image_url: string | Blob | File; /** * The prompt to edit the image with. */ prompt: string; /** * The seed to use for the generation. */ seed?: number; /** * Whether to use thought tokens for generation. If set to true, the model will "think" to potentially improve generation quality. Increases generation time and increases the cost by 20%. */ use_thought?: boolean; }; export type bagelInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The prompt to generate an image from. */ prompt: string; /** * The seed to use for the generation. */ seed?: number; /** * Whether to use thought tokens for generation. If set to true, the model will "think" to potentially improve generation quality. Increases generation time and increases the cost by 20%. */ use_thought?: boolean; }; export type BagelUnderstandInput = { /** * The image for the query. */ image_url: string | Blob | File; /** * The prompt to query the image with. */ prompt: string; /** * The seed to use for the generation. */ seed?: number; }; export type BagelUnderstandOutput = { /** * The query used for the generation. */ prompt: string; /** * The seed used for the generation. */ seed: number; /** * The answer to the query. */ text: string; /** * The timings of the generation. */ timings: unknown; }; export type BaseImageToInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The URL of the image to generate an image from. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `40` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength of the initial image. Higher strength values are better for this model. Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BaseInput = { /** * The strength of Classifier Free Guidance. Default value: `4.5` */ cfg_strength?: number; /** * The duration of the audio to generate. Default value: `8` */ duration?: number; /** * Whether to mask away the clip. */ mask_away_clip?: boolean; /** * The negative prompt to generate the audio for. Default value: `""` */ negative_prompt?: string; /** * The number of steps to generate the audio for. Default value: `25` */ num_steps?: number; /** * The prompt to generate the audio for. */ prompt: string; /** * The seed for the random number generator */ seed?: number; /** * The URL of the video to generate the audio for. */ video_url: string | Blob | File; }; export type BaseKontextEditInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The URL of the image to edit. * * Max width: 14142px, Max height: 14142px, Timeout: 20s */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to edit the image. */ prompt: string; /** * Determines how the output resolution is set for image editing. * - `auto`: The model selects an optimal resolution from a predefined set that best matches the input image's aspect ratio. This is the recommended setting for most use cases as it's what the model was trained on. * - `match_input`: The model will attempt to use the same resolution as the input image. The resolution will be adjusted to be compatible with the model's requirements (e.g. dimensions must be multiples of 16 and within supported limits). * Apart from these, a few aspect ratios are also supported. Default value: `"match_input"` */ resolution_mode?: "auto" | "match_input" | "1:1" | "16:9" | "21:9" | "3:2" | "2:3" | "4:5" | "5:4" | "3:4" | "4:3" | "9:16" | "9:21"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BaseKontextImg2ImgInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The URL of the image for image-to-image. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * Output format Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt for the image to image task. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength of the initial image. Higher strength values are better for this model. Default value: `0.88` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BaseKontextInpaintInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The URL of the image to be inpainted. */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The URL of the mask for inpainting. */ mask_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt for the image to image task. */ prompt: string; /** * The URL of the reference image for inpainting. */ reference_image_url: string | Blob | File; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength of the initial image. Higher strength values are better for this model. Default value: `0.88` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BaseKontextInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * Output format Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BaseQwenEditImg2ImgInput = { /** * Acceleration level for image generation. Options: 'none', 'regular'. Higher acceleration increases speed. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate the image with */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Strength of the image-to-image transformation. Lower values preserve more of the original image. Default value: `0.94` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BaseReduxInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to generate an image from. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BasicAnimations = { /** * Running armature only in GLB format */ running_armature_glb?: File; /** * Running animation in FBX format */ running_fbx?: File; /** * Running animation in GLB format */ running_glb?: File; /** * Walking armature only in GLB format */ walking_armature_glb?: File; /** * Walking animation in FBX format */ walking_fbx?: File; /** * Walking animation in GLB format */ walking_glb?: File; }; export type BatchMoonDreamOutput = { /** * URL to the generated captions JSON file containing filename-caption pairs. */ captions_file: File; /** * List of generated captions */ outputs: Array; }; export type BatchQueryInput = { /** * List of image URLs to be processed (maximum 32 images) */ images_data_url: string | Blob | File; /** * Maximum number of tokens to generate Default value: `64` */ max_tokens?: number; /** * Single prompt to apply to all images */ prompt: string; }; export type BboxInput = { /** * List of bounding box coordinates to erase (only one box prompt is supported) */ box_prompts?: Array; /** * The URL of the image to remove objects from. */ image_url: string | Blob | File; /** * Amount of pixels to expand the mask by. Range: 0-50 Default value: `15` */ mask_expansion?: number; /** * Default value: `"best_quality"` */ model?: "low_quality" | "medium_quality" | "high_quality" | "best_quality"; }; export type BBoxPromptBase = { /** * X Max Coordinate of the prompt (0-1) */ x_max?: number; /** * X Min Coordinate of the box (0-1) */ x_min?: number; /** * Y Max Coordinate of the prompt (0-1) */ y_max?: number; /** * Y Min Coordinate of the box (0-1) */ y_min?: number; }; export type BenV2VideoInput = { /** * Optional RGB values (0-255) for the background color. If not provided, the background will be transparent. For ex: [0, 0, 0] */ background_color?: Array; /** * Output video format. Use "webm" for true transparency support (VP9 codec with alpha channel). MP4 format does not support transparency and will render transparent areas as black. Default value: `"mp4"` */ output_format?: "mp4" | "webm"; /** * Random seed for reproducible generation. */ seed?: number; /** * URL of video to be used for background removal. */ video_url: string | Blob | File; }; export type BGRemoveBatchedInput = { /** * List of image URLs to be processed (maximum 32 images) */ images_data_url: string | Blob | File; }; export type BGRemoveInput = { /** * Input Image to erase from */ image_url: string | Blob | File; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BGReplaceInput = { /** * Whether to use the fast model Default value: `true` */ fast?: boolean; /** * Input Image to erase from */ image_url: string | Blob | File; /** * The negative prompt you would like to use to generate images. Default value: `""` */ negative_prompt?: string; /** * Number of Images to generate. Default value: `1` */ num_images?: number; /** * The prompt you would like to use to generate images. */ prompt?: string; /** * The URL of the reference image to be used for generating the new background. Use "" to leave empty. Either ref_image_url or bg_prompt has to be provided but not both. If both ref_image_url and ref_image_file are provided, ref_image_url will be used. Accepted formats are jpeg, jpg, png, webp. Default value: `""` */ ref_image_url?: string | Blob | File; /** * Whether to refine prompt Default value: `true` */ refine_prompt?: boolean; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type birefnetInput = { /** * URL of the image to remove background from */ image_url: string | Blob | File; /** * Model to use for background removal. * The 'General Use (Light)' model is the original model used in the BiRefNet repository. * The 'General Use (Heavy)' model is a slower but more accurate model. * The 'Portrait' model is a model trained specifically for portrait images. * The 'General Use (Light)' model is recommended for most use cases. * * The corresponding models are as follows: * - 'General Use (Light)': BiRefNet-DIS_ep580.pth * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` */ model?: "General Use (Light)" | "General Use (Heavy)" | "Portrait"; /** * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` */ operating_resolution?: "1024x1024" | "2048x2048"; /** * The format of the output image Default value: `"png"` */ output_format?: "webp" | "png" | "gif"; /** * Whether to output the mask used to remove the background */ output_mask?: boolean; /** * Whether to refine the foreground using the estimated mask Default value: `true` */ refine_foreground?: boolean; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type birefnetOutput = { /** * Image with background removed */ image: ImageFile; /** * Mask used to remove the background */ mask_image?: ImageFile; }; export type BirefnetV2Input = { /** * URL of the image to remove background from */ image_url: string | Blob | File; /** * Whether to return only the segmentation mask without applying it to the image. When set to `True`, only the mask will be returned and foreground refinement will be skipped. Useful for reducing computation and data transfer when only the mask is needed. */ mask_only?: boolean; /** * Model to use for background removal. * The 'General Use (Light)' model is the original model used in the BiRefNet repository. * The 'General Use (Light 2K)' model is the original model used in the BiRefNet repository but trained with 2K images. * The 'General Use (Heavy)' model is a slower but more accurate model. * The 'Matting' model is a model trained specifically for matting images. * The 'Portrait' model is a model trained specifically for portrait images. * The 'General Use (Dynamic)' model supports dynamic resolutions from 256x256 to 2304x2304. * The 'General Use (Light)' model is recommended for most use cases. * * The corresponding models are as follows: * - 'General Use (Light)': BiRefNet * - 'General Use (Light 2K)': BiRefNet_lite-2K * - 'General Use (Heavy)': BiRefNet_lite * - 'Matting': BiRefNet-matting * - 'Portrait': BiRefNet-portrait * - 'General Use (Dynamic)': BiRefNet_dynamic Default value: `"General Use (Light)"` */ model?: "General Use (Light)" | "General Use (Light 2K)" | "General Use (Heavy)" | "Matting" | "Portrait" | "General Use (Dynamic)"; /** * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. The '2304x2304' option is only available for the 'General Use (Dynamic)' model. Default value: `"1024x1024"` */ operating_resolution?: "1024x1024" | "2048x2048" | "2304x2304"; /** * The format of the output image Default value: `"png"` */ output_format?: "webp" | "png" | "gif"; /** * Whether to output the mask used to remove the background */ output_mask?: boolean; /** * Whether to refine the foreground using the estimated mask Default value: `true` */ refine_foreground?: boolean; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BirefnetV2VideoInput = { /** * Model to use for background removal. * The 'General Use (Light)' model is the original model used in the BiRefNet repository. * The 'General Use (Light 2K)' model is the original model used in the BiRefNet repository but trained with 2K images. * The 'General Use (Heavy)' model is a slower but more accurate model. * The 'Matting' model is a model trained specifically for matting images. * The 'Portrait' model is a model trained specifically for portrait images. * The 'General Use (Dynamic)' model supports dynamic resolutions from 256x256 to 2304x2304. * The 'General Use (Light)' model is recommended for most use cases. * * The corresponding models are as follows: * - 'General Use (Light)': BiRefNet * - 'General Use (Light 2K)': BiRefNet_lite-2K * - 'General Use (Heavy)': BiRefNet_lite * - 'Matting': BiRefNet-matting * - 'Portrait': BiRefNet-portrait * - 'General Use (Dynamic)': BiRefNet_dynamic Default value: `"General Use (Light)"` */ model?: "General Use (Light)" | "General Use (Light 2K)" | "General Use (Heavy)" | "Matting" | "Portrait" | "General Use (Dynamic)"; /** * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. The '2304x2304' option is only available for the 'General Use (Dynamic)' model. Default value: `"1024x1024"` */ operating_resolution?: "1024x1024" | "2048x2048" | "2304x2304"; /** * Whether to output the mask used to remove the background */ output_mask?: boolean; /** * Whether to refine the foreground using the estimated mask Default value: `true` */ refine_foreground?: boolean; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * URL of the video to remove background from */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type BirefnetV2VideoOutput = { /** * Mask used to remove the background */ mask_video?: VideoFile; /** * Video with background removed */ video: VideoFile; }; export type bitdanceInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Classifier-free guidance scale. Higher values follow the prompt more closely. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Will be snapped to the nearest supported resolution. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * Number of diffusion sampling steps per decoding step. Higher values (e.g. 50) improve quality at the cost of speed. Default value: `25` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * Text prompt for image generation. */ prompt: string; /** * Random seed for reproducibility. The same seed and prompt will produce the same image. */ seed?: number; /** * If true, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type bitdanceOutput = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image files info. */ images: Array; /** * The prompt used for generating the image. */ prompt: string; /** * Seed of the generated image. */ seed: number; }; export type BlendingInput = { /** * The source image. */ image_url: string | Blob | File; /** * Instruct what elements you would like to blend in your image. */ instruction: string; }; export type BlendVideoInput = { /** * Blend mode to use for combining the videos Default value: `"overlay"` */ blend_mode?: "addition" | "average" | "burn" | "darken" | "difference" | "divide" | "dodge" | "exclusion" | "grainextract" | "grainmerge" | "hardlight" | "lighten" | "multiply" | "negation" | "normal" | "overlay" | "phoenix" | "pinlight" | "reflect" | "screen" | "softlight" | "subtract" | "vividlight"; /** * URL of the bottom layer video */ bottom_video_url: string | Blob | File; /** * Opacity of the top layer (0.0-1.0) Default value: `1` */ opacity?: number; /** * End output when the shortest input ends Default value: `true` */ shortest?: boolean; /** * URL of the top layer video */ top_video_url: string | Blob | File; }; export type BlurInput = { /** * Blur radius Default value: `3` */ blur_radius?: number; /** * Sigma for Gaussian blur Default value: `1` */ blur_sigma?: number; /** * Type of blur to apply Default value: `"gaussian"` */ blur_type?: "gaussian" | "kuwahara"; /** * URL of image to process */ image_url: string | Blob | File; }; export type BlurOutput = { /** * The processed images with blur effect */ images: Array; }; export type BoundingBox = { /** * Height of the bounding box */ h: number; /** * Label of the bounding box */ label: string; /** * Width of the bounding box */ w: number; /** * X-coordinate of the top-left corner */ x: number; /** * Y-coordinate of the top-left corner */ y: number; }; export type BoundingBoxes = { /** * List of bounding boxes */ bboxes: Array; }; export type BoxPrompt = { /** * The frame index to interact with. */ frame_index?: number; /** * Optional object identifier. Boxes sharing an object id refine the same object. */ object_id?: number; /** * X Max Coordinate of the box */ x_max?: number; /** * X Min Coordinate of the box */ x_min?: number; /** * Y Max Coordinate of the box */ y_max?: number; /** * Y Min Coordinate of the box */ y_min?: number; }; export type BoxPromptBase = { /** * Optional object identifier. Boxes sharing an object id refine the same object. */ object_id?: number; /** * X Max Coordinate of the box */ x_max?: number; /** * X Min Coordinate of the box */ x_min?: number; /** * Y Max Coordinate of the box */ y_max?: number; /** * Y Min Coordinate of the box */ y_min?: number; }; export type BriaExpandInput = { /** * The desired aspect ratio of the final image. Will be used over original_image_size and original_image_location if provided. */ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9"; /** * The desired size of the final image, after the expansion. should have an area of less than 5000x5000 pixels. */ canvas_size: Array; /** * The URL of the input image. */ image_url: string | Blob | File; /** * The negative prompt you would like to use to generate images. Default value: `""` */ negative_prompt?: string; /** * The desired location of the original image, inside the full canvas. Provide the location of the upper left corner of the original image. The location can also be outside the canvas (the original image will be cropped). Will be ignored if aspect_ratio is provided. */ original_image_location?: Array; /** * The desired size of the original image, inside the full canvas. Ensure that the ratio of input image foreground or main subject to the canvas area is greater than 15% to achieve optimal results. Will be ignored if aspect_ratio is provided. */ original_image_size?: Array; /** * Text on which you wish to base the image expansion. This parameter is optional. Bria currently supports prompts in English only, excluding special characters. Default value: `""` */ prompt?: string; /** * You can choose whether you want your generated expension to be random or predictable. You can recreate the same result in the future by using the seed value of a result from the response. You can exclude this parameter if you are not interested in recreating your results. This parameter is optional. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BriaFiboVlmStructuredprompt = { /** * The aesthetics of the image to be generated. */ aesthetics?: Aesthetics; /** * The artistic style of the image to be generated. */ artistic_style?: string; /** * The background setting of the image to be generated. */ background_setting?: string; /** * The context of the image to be generated. */ context?: string; /** * The lighting of the image to be generated. */ lighting?: Lighting; /** * A list of objects in the image to be generated, along with their attributes and relationships to other objects in the image. */ objects?: Array; /** * The photographic characteristics of the image to be generated. */ photographic_characteristics?: PhotographicCharacteristics; /** * A short description of the image to be generated. */ short_description?: string; /** * The style medium of the image to be generated. */ style_medium?: string; /** * A list of text to be rendered in the image. */ text_render?: Array; }; export type BriaTextToImageHdInput = { /** * The aspect ratio of the image. When a guidance method is being used, the aspect ratio is defined by the guidance image and this parameter is ignored. Default value: `"1:1"` */ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9"; /** * Guidance images to use for the generation. Up to 4 guidance methods can be combined during a single inference. */ guidance?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * Which medium should be included in your generated images. This parameter is optional. */ medium?: "photography" | "art"; /** * The negative prompt you would like to use to generate images. Default value: `""` */ negative_prompt?: string; /** * How many images you would like to generate. When using any Guidance Method, Value is set to 1. Default value: `4` */ num_images?: number; /** * The number of iterations the model goes through to refine the generated image. This parameter is optional. Default value: `30` */ num_inference_steps?: number; /** * The prompt you would like to use to generate images. */ prompt: string; /** * When set to true, enhances the provided prompt by generating additional, more descriptive variations, resulting in more diverse and creative output images. */ prompt_enhancement?: boolean; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type BytedanceUpscalerUpscaleVideoInput = { /** * The enhancement preset optimized for specific video scenarios. 'general' is a general-purpose template, 'ugc' targets user-generated short videos, 'short_series' is for short dramas, 'aigc' is for AI-generated content, and 'old_film' is for classic film restoration. Default value: `"general"` */ enhancement_preset?: "general" | "ugc" | "short_series" | "aigc" | "old_film"; /** * The enhancement quality tier. 'fast' provides essential upscaling with good speed, 'standard' uses adaptive algorithms for better visual texture, 'pro' uses large-model restoration for cinematic quality (longer processing time), and 10 times the cost of `standard` and `fast`. Default value: `"standard"` */ enhancement_tier?: "fast" | "standard" | "pro"; /** * The enhancement intensity. 'high' applies mild enhancement while keeping visual texture close to the source video. 'medium' provides a balanced image quality enhancement. Default value: `"high"` */ fidelity?: "high" | "medium"; /** * The scaling ratio for the output video resolution. When set, overrides target_resolution and scales the input resolution by this factor (e.g., 2.0 doubles the resolution). Range: 1.1 to 10.0. Please note that this is valid only up to 4k resolution, and trying to scale beyond 4k will result in an error. (4k is defined as having atotal pixel count of 3840x2160). */ scale_ratio?: number; /** * The target FPS of the video to upscale. Default value: `"30fps"` */ target_fps?: "30fps" | "60fps"; /** * The target resolution of the video to upscale. Default value: `"1080p"` */ target_resolution?: "1080p" | "2k" | "4k"; /** * The URL of the video to upscale. */ video_url: string | Blob | File; }; export type CameraControl = { /** * The type of camera movement */ movement_type: "horizontal" | "vertical" | "pan" | "tilt" | "roll" | "zoom"; /** * The value of the camera movement */ movement_value: number; }; export type CannyInput = { /** * High threshold for the hysteresis procedure. Edges with a strength higher than the high threshold will always appear as edges in the output image. Default value: `200` */ high_threshold?: number; /** * URL of the image to process */ image_url: string | Blob | File; /** * Low threshold for the hysteresis procedure. Edges with a strength higher than the low threshold will appear in the output image, if there are strong edges nearby. Default value: `100` */ low_threshold?: number; }; export type cartoonifyInput = { /** * Whether to enable the safety checker Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance scale for the generation Default value: `3.5` */ guidance_scale?: number; /** * URL of the image to apply Pixar style to */ image_url: string | Blob | File; /** * Number of inference steps Default value: `28` */ num_inference_steps?: number; /** * Scale factor for the Pixar effect Default value: `1` */ scale?: number; /** * The seed for image generation. Same seed with same parameters will generate same image. */ seed?: number; /** * Whether to use CFG zero */ use_cfg_zero?: boolean; }; export type CatVtonInput = { /** * Type of the Cloth to be tried on. * * Options: * upper: Upper body cloth * lower: Lower body cloth * overall: Full body cloth * inner: Inner cloth, like T-shirt inside a jacket * outer: Outer cloth, like a jacket over a T-shirt */ cloth_type: "upper" | "lower" | "overall" | "inner" | "outer"; /** * Url to the garment image. */ garment_image_url: string | Blob | File; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * Url for the human image. */ human_image_url: string | Blob | File; /** * The size of the generated image. Default value: `portrait_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The same seed and the same input given to the same version of the model * will output the same image every time. */ seed?: number; }; export type ccsrInput = { /** * Type of color correction for samples. Default value: `"adain"` */ color_fix_type?: "none" | "wavelet" | "adain"; /** * The URL or data URI of the image to upscale. */ image_url: string | Blob | File; /** * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` */ scale?: number; /** * Seed for reproducibility. Different seeds will make slightly different results. */ seed?: number; /** * The number of steps to run the model for. The higher the number the better the quality and longer it will take to generate. Default value: `50` */ steps?: number; /** * The ending point of uniform sampling strategy. Default value: `0.6667` */ t_max?: number; /** * The starting point of uniform sampling strategy. Default value: `0.3333` */ t_min?: number; /** * If specified, a patch-based sampling strategy will be used for sampling. Default value: `"none"` */ tile_diffusion?: "none" | "mix" | "gaussian"; /** * Size of patch. Default value: `1024` */ tile_diffusion_size?: number; /** * Stride of sliding patch. Default value: `512` */ tile_diffusion_stride?: number; /** * If specified, a patch-based sampling strategy will be used for VAE decoding. */ tile_vae?: boolean; /** * Size of VAE patch. Default value: `226` */ tile_vae_decoder_size?: number; /** * Size of latent image Default value: `1024` */ tile_vae_encoder_size?: number; }; export type ccsrOutput = { /** * The generated image file info. */ image: Image; /** * The seed used for the generation. */ seed: number; }; export type ChainOfZoomInput = { /** * X coordinate of zoom center (0-1) Default value: `0.5` */ center_x?: number; /** * Y coordinate of zoom center (0-1) Default value: `0.5` */ center_y?: number; /** * Input image to zoom into */ image_url: string | Blob | File; /** * Zoom scale in powers of 2 Default value: `5` */ scale?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Additional prompt text to guide the zoom enhancement Default value: `""` */ user_prompt?: string; }; export type ChainOfZoomOutput = { /** * List of intermediate images */ images: Array; /** * Actual linear zoom scale applied */ scale: number; /** * Center coordinates used for zoom */ zoom_center: Array; }; export type Character = { /** * ID of the avatar to use in the video */ avatar: "Abigail (Upper Body)" | "Abigail Office Front" | "Abigail Office Side" | "Abigail Sofa Front" | "Abigail Sofa Side" | "Aditya in Brown blazer" | "Aditya in Blue blazer" | "Aditya in Blue shirt" | "Aditya in Blue t-shirt" | "Aditya in Beige blazer" | "Adrian in Blue Shirt" | "Adrian in Blue Suit" | "Adrian in Blue Sweater" | "Adriana BizTalk Front" | "Adriana BizTalk Side" | "Adriana Business Front 2" | "Adriana Business Front " | "Adriana Business Side" | "Adriana Nurse Front 2" | "Adriana Nurse Front" | "Adriana Nurse Side 2" | "Adriana Nurse Side" | "Adriana Nurse Sitting Side" | "Adriana SuitSofa Front" | "Aiko" | "Albert in Blue Polo shirt" | "Albert in Blue blazer" | "Albert in Blue shirt" | "Albert in Blue suit" | "Albert in Khaki blazer" | "Albert in White shirt" | "Amanda in Blue Shirt (Front)" | "Amanda in Blue Shirt (Left)" | "Amanda in Blue Shirt (Right)" | "Amanda in Grey Shirt (Front)" | "Amanda in Grey Shirt (Left)" | "Amanda in Grey Shirt (Right)" | "Amanda in Maintenance (Front)" | "Amanda in Maintenance (Left)" | "Amanda in Maintenance (Right)" | "Amelia Business Training Front 2" | "Amelia Business Training Front" | "Amelia Business Training Side 2" | "Amelia Business Training Side" | "Amelia Lounge Front 2" | "Amelia Lounge Front" | "Amelia Lounge Side 2" | "Amelia Lounge Side" | "Amelia Yoga Front 2" | "Amelia Yoga Front" | "Amelia Yoga Side 2" | "Amelia Yoga Side" | "Alex in Black Suit" | "Alex in Jacket" | "Alex in White Coat" | "Alex in Yellow Sweater" | "Anja Office Front" | "Anja Office Side " | "Anja Sofa Front" | "Anja Sofa Side " | "Ann Business Front" | "Ann Business Sitting" | "Ann Casual Front" | "Ann Casual Lying" | "Ann Casual Sitting 2" | "Ann Casual Sitting" | "Ann Doctor Sitting" | "Ann Doctor Standing" | "Ann Sweater Front" | "Ann Sweater Side" | "Ann Therapist" | "Anna in White T-shirt" | "Anna in Turtleneck T-shirt" | "Anna in Brown T-shirt" | "Annelise in Dark blue dress" | "Annelise in Off white dress" | "Annelise in Sky blue dress" | "Annelise in White dress" | "Annelore in Blue blazer" | "Annelore in Blue dress" | "Annelore in Red blazer" | "Annelore in Red sweater" | "Annelore in White shirt" | "Annie in Grey Jacket" | "Annie in White Shirt" | "Annie in Tan Jacket" | "Annie in Blue Casual" | "Annie in Light Blue" | "Annie in Brown Shirt" | "Annie in Black Shirt" | "Annie in Pink Suit" | "Annie in Grey Dress" | "Annie in Blue Vest" | "Annie in Black V-neck Shirt" | "Annie in Blue Suit" | "Annie Bar Sitting Front" | "Annie Bar Sitting Side" | "Annie Bar Standing Front 2" | "Annie Bar Standing Front 3" | "Annie Bar Standing Front" | "Annie Bar Standing Side 2" | "Annie Bar Standing Side 3" | "Annie Bar Standing Side" | "Annie Business Casual Standing Front 2" | "Annie Business Casual Standing Front" | "Annie Business Casual Standing Side 2" | "Annie Business Casual Standing Side" | "Annie Casual Sitting Front 2" | "Annie Casual Sitting Front" | "Annie Casual Sitting Side 2" | "Annie Casual Sitting Side" | "Annie Casual Standing Front 2" | "Annie Casual Standing Front" | "Annie Casual Standing Side 2" | "Annie Casual Standing Side" | "Annie Desk Sitting Front 2" | "Annie Desk Sitting Front" | "Annie Desk Sitting Side 2" | "Annie Desk Sitting Side" | "Annie Lounge Standing Front" | "Annie Lounge Standing Side" | "Annie Office Sitting Front 2" | "Annie Office Sitting Front" | "Annie Office Sitting Side 2" | "Annie Office Sitting Side" | "Annie Office Standing Front" | "Annie Office Standing Side" | "Annie Sofa Sitting Front 3" | "Annie Sofa Sitting Front" | "Annie Sofa Sitting Side 2" | "Annie Sofa Sitting Side 3" | "Annie Sofa Sitting Side" | "Annie Sofa Sitting Front 2" | "Annie Studio Pink Sitting Front" | "Annie Studio Pink Sitting Side" | "Annie Studio Pink Standing Front" | "Annie Studio Pink Standing Side" | "Armando Casual Front" | "Armando Casual Side" | "Armando Suit Front" | "Armando Suit Side" | "Armando Sweater Front 2" | "Armando Sweater Front" | "Armando Sweater Side 2" | "Armando Sweater Side" | "Artur Office Front 2" | "Artur Office Front" | "Artur Office Side 2" | "Artur Office Side" | "Artur Sofa Casual Front 2" | "Artur Sofa Casual Front" | "Artur Sofa Casual Side 2" | "Artur Sofa Causal Side" | "Aubrey Sofa Side" | "Aubrey (Upper Body)" | "Aubrey Night Scene Front" | "Aubrey Outdoor Sport Front" | "Aubrey Outdoor Sport Side" | "Aubrey Sofa Front" | "August Casual Front 2" | "August Casual Front" | "August Cool Sitting" | "August Cool Style" | "August Employee Front" | "August Hoodies Front 2" | "August Hoodies Front" | "Bahar (Upper Body)" | "Bahar Business Front" | "Bahar Business Side" | "Bahar Business Sitting Front" | "Bahar Business Sitting Side" | "Bahar Casual Sitting Front 2" | "Bahar Casual Sitting Front" | "Bahar Casual Sitting Side 2" | "Bahar Casual Sitting Side" | "Bahar Denim Casual Front" | "Bahar Denim Casual Side" | "Bahar Denim Front" | "Bahar Denim Side" | "Bahar Jacket Casual Front" | "Bahar Jacket Casual Side" | "Bahar Jacket Front" | "Bahar Jacket Side" | "Bahar Suit Front" | "Bahar Suit Side" | "Bastien in Blue T-shirt" | "Bastien in Blue blazer" | "Bastien in Blue shirt" | "Bastien in White shirt" | "Blanka Lounge Front" | "Blanka Lounge Side" | "Blanka Outdoor Business Front" | "Blanka Outdoor Business Side" | "Blanka Outdoor Reading Front" | "Blanka Outdoor Reading Side" | "Blanka Picnic Front" | "Blanka Picnic Side" | "Bojan Business Training Front 2" | "Bojan Business Training Front" | "Bojan Business Training Side 2" | "Bojan Business Training Side" | "Bojan Lounge Front 2" | "Bojan Lounge Front" | "Bojan Lounge Side 2" | "Bojan Lounge Side" | "Bojan Sport Front 2" | "Bojan Sport Front" | "Bojan Sport Side 2" | "Bojan Sport Side" | "Bradley in Blue Polo (Front)" | "Bradley in Blue Polo (Left)" | "Bradley in Blue Polo (Right)" | "Bradley in Blue Shirt (Front)" | "Bradley in Blue Shirt (Left)" | "Bradley in Blue Shirt (Right)" | "Bradley in Doctor (Front)" | "Bradley in Doctor (Left)" | "Bradley in Doctor (Right)" | "Brandon in Grey Suit" | "Brandon in Blue Sweater" | "Brandon in White Shirt" | "Brandon Business Sitting Front" | "Brandon Business Sitting Side" | "Brandon Business Standing Front" | "Brandon Business Standing Side" | "Brandon Casual Sitting Front" | "Brandon Casual Sitting Side" | "Brandon Kitchen Standing Front" | "Brandon Kitchen Standing Side" | "Brandon Lobby Sitting Front" | "Brandon Lobby Sitting Side" | "Brandon Lobby Standing Front" | "Brandon Lobby Standing Side" | "Brandon Office Sitting Front" | "Brandon Office Sitting Side" | "Brandon Office Standing Front" | "Brandon Office Standing Side" | "Brandon Sofa Sitting Front" | "Brandon Sofa Sitting Side" | "Brent Office Front 2" | "Brent Office Front" | "Brent Office Side 2" | "Brent Office Side" | "Brent Sofa Front 2" | "Brent Sofa Front" | "Brent Sofa Side 2" | "Brent Sofa Side" | "Briana in White shirt" | "Briana in Striped T-shirt" | "Briana in Brown suit" | "Bruce" | "Bryan Casual Front" | "Bryan Casual Side" | "Bryan Fitness Coach" | "Bryan Tech Expert" | "Bryan Plaid Shirt Front" | "Bryan Plaid Shirt Side" | "Bryan Suit Front" | "Bryan Suit Side" | "Bryce in Black t-shirt" | "Bryce in Blue blazer" | "Bryce in Blue shirt" | "Bryce in Grey blazer" | "Bryce in White shirt" | "Byron Business Front 2" | "Byron Business Front" | "Byron Business Side 2" | "Byron Business Side" | "Byron Business Sitting Front" | "Byron Business Sitting Size" | "Byron Casual Front 2" | "Byron Casual Front" | "Byron Casual Side 2" | "Byron Casual Side" | "Byron Casual Sitting Front" | "Byron Casual Sitting Side" | "Byron Jacket Front" | "Byron Jacket Side" | "Byron Sitting Front" | "Byron Sitting Side" | "Byron Suit Front" | "Byron Suit Side" | "Candace in Beige Dress (Front)" | "Candace in Beige Dress (Left)" | "Candace in Beige Dress (Right)" | "Candace in Doctor (Front)" | "Candace in Doctor (Left)" | "Candace in Doctor (Right)" | "Candace in Pink Blazer (Front)" | "Candace in Pink Blazer (Right)" | "Candace in Pink Blazer (Left)" | "Carla in Doctor (Front)" | "Carla in Doctor (Left)" | "Carla in Doctor (Right)" | "Carla in Dress (Left)" | "Carla in Shirt (Front)" | "Carla in Shirt (Left)" | "Carla in Shirt (Right)" | "Carlotta BizTalk Front" | "Carlotta BizTalk Side" | "Carlotta Business Front" | "Carlotta Business Side" | "Carlotta Casual Front" | "Carlotta Casual Side" | "Carlotta Casual Sitting Front" | "Carlotta Casual Sitting Side" | "Carlotta Half Front" | "Carlotta Pink Jumpsuit Front 2" | "Carlotta Pink Jumpsuit Front" | "Carlotta Pink Jumpsuit Side 2" | "Carlotta Pink Jumpsuit Side" | "Carlotta Pink Jumpsuit Sitting Front" | "Carlotta Pink Jumpsuit Sitting Side" | "Caroline Business Sitting Front" | "Caroline Business Sitting Side" | "Caroline Business Standing Front" | "Caroline Business Standing Side" | "Caroline in Blue Suit" | "Caroline in White Shirt" | "Caroline in Yellow Casual" | "Caroline in Yellow Skirt" | "Caroline Casual Sitting Front" | "Caroline Casual Sitting Side" | "Caroline Kitchen Standing Front" | "Caroline Kitchen Standing Side" | "Caroline Lobby Sitting Front" | "Caroline Lobby Sitting Side" | "Caroline Lobby Standing Front" | "Caroline Lobby Standing Side" | "Caroline Office Sitting Front" | "Caroline Office Sitting Side" | "Caroline Office Standing Front" | "Caroline Office Standing Side" | "Caroline Sofa Sitting Front" | "Caroline Sofa Sitting Side" | "Chad in Blue Shirt (Front)" | "Chad in Blue Shirt (Left)" | "Chad in Blue Shirt (Right)" | "Chad in Grey Shirt (Front)" | "Chad in Grey Shirt (Left)" | "Chad in Grey Shirt (Right)" | "Chad in Maintenance (Front)" | "Chad in Maintenance (Left)" | "Chad in Maintenance (Right)" | "Chakir in Headscarf (Front)" | "Chakir in Headscarf (Left)" | "Chakir in Headscarf (Right)" | "Chakir in White Shirt (Front)" | "Chakir in White Shirt (Right)" | "Chakir in white Shirt (Left)" | "Chloe (Upper Body)" | "Chloe Lounge Front" | "Chloe Lounge Side" | "Chloe Outdoor Side" | "Colin Business Front 2" | "Colin Business Front" | "Colin Jacket Front" | "Colin Sitting Cool Style 2" | "Colin Sitting Cool Style" | "Colin Suit Front" | "Colin Sweater Front" | "Colin Sweater Sitting Front" | "Connie Business Front" | "Connie Business Sitting Front" | "Connie Casual Sitting Front" | "Connie Casual Sitting Side" | "Connie Education Front" | "Connie Education Side 2" | "Connie Education Side" | "Connie Skirt Front" | "Connie Skirt Side" | "Connie Skirt Sitting Front" | "Connie Skirt Sitting Side" | "Connie Suit Front" | "Conrad House Front" | "Conrad House Side" | "Conrad Sofa Front" | "Conrad Sofa Side" | "Crisanto Business Front" | "Crisanto Business Side" | "Crisanto Chef Front 2" | "Crisanto Chef Front" | "Crisanto Chef Side 2" | "Crisanto Chef Side" | "Crisanto Chef Sitting " | "Crisanto Education Front" | "Crisanto Education Side" | "Crisanto Nurse Front 2" | "Crisanto Nurse Front" | "Crisanto Nurse Side 2" | "Crisanto Nurse Side" | "Crisanto Nurse Sitting Front" | "Crisanto Nurse Sitting Side" | "Crisanto Suit Front" | "Crisanto Suit Side" | "Daphne in Blue blazer" | "Daphne in Blue shirt" | "Daphne in Grey blazer" | "Daphne in Grey suit" | "Daphne in Pink hoodie" | "Daphne in White t-shirt" | "Darnell in Blue Shirt (Front)" | "Darnell in Blue Shirt (Left)" | "Darnell in Blue Shirt (Right)" | "Darnell in Bordeaux Polo (Front)" | "Darnell in Bordeaux Polo (Left)" | "Darnell in Bordeaux Polo (Right)" | "Darnell in Doctor (Front)" | "Darnell in Doctor (Left)" | "Darnell in Doctor (Right)" | "Derya Indoor Front 2" | "Derya Indoor Front" | "Derya Indoor Side 2" | "Derya Indoor Side" | "Derya Office Front 2" | "Derya Office Front" | "Derya Office Side 2" | "Derya Office Side" | "Dexter Casual Front" | "Dexter Casuat Side" | "Dexter Doctor Sitting" | "Dexter Doctor Standing" | "Dexter Lawyer" | "Dexter Suit Front" | "Dexter Suit Side" | "Dexter Winter Coat Front" | "Dexter Winter Coat Side" | "Diana in Black Camisole Top" | "Diana in Striped Shirt" | "Diana in White Shirt" | "Diora in Blue blazer" | "Diora in Green blazer" | "Diora in Pink shirt" | "Diora in White shirt" | "Diora in White t-shirt" | "Diran Casual Front" | "Diran Casual Side" | "Diran Jacket Front" | "Diran Jacket Side" | "Diran Macbook Business Front" | "Diran Macbook Business Side" | "Diran Macbook Casual Front" | "Diran Macbook Casual Side" | "Diran Macbook Sitting Front 2" | "Diran Macbook Sitting Front" | "Diran Macbook Sitting Side 2" | "Diran Macbook Sitting Side" | "Diran Suit Front" | "Diran Suit Side" | "Diran iPad Front" | "Diran iPad Side" | "Diran iPad Sitting Front" | "Diran iPad Sitting Side" | "Elenora Casual Front" | "Elenora Casual Side" | "Elenora Fitness Coach" | "Elenora Fitness Coach 2" | "Elenora Tech Expert" | "Elenora Suit Front" | "Elenora Suit Side" | "Elenora YellowDress Front" | "Elenora YellowDress Side" | "Emanuel Office Front" | "Emanuel Office Side" | "Emanuel Sofa Front" | "Emanuel Sofa Side" | "Emery in Blue blazer" | "Emery in Blue suit" | "Emery in Green shirt" | "Emery in Khaki blazer" | "Emery in Red blazer" | "Emery in White t-shirt" | "Emilia Outdoor Business Front" | "Emilia Outdoor Business Side" | "Emilia Outdoor Yoga Front 2" | "Emilia Outdoor Yoga Front" | "Emilia Outdoor Yoga Side" | "Emilia Picnic Front" | "Emilia Picnic Side" | "Edward in Black Suit" | "Edward" | "Edward in Blue Shirt" | "Esmond in Black coat" | "Esmond in Black shirt" | "Esmond in Blue blazer" | "Esmond in Blue suit" | "Esmond in Grey sweater" | "Fernando Business Indoor Front" | "Fernando Business Indoor Side" | "Fernando OutdoorChair Front 2" | "Fernando Outdoor Chair Front" | "Fernando Outdoor Chair Side" | "Fernando Outdoor Front" | "Fernando Outdoor Side" | "Fernando Outdoor Table Front" | "Fernando Outdoor Table Side" | "Fina Business Sitting Front" | "Fina Business Sitting Side" | "Fina Casual Front 2" | "Fina Casual Front" | "Fina Casual Side 2" | "Fina Casual Side" | "Fina Casual Sitting Front" | "Fina Casual Sitting Side" | "Fina Denim Front" | "Fina Denim Side" | "Fina Denim Sitting Front" | "Fina Employee Front" | "Fina Employee Side" | "Fina Suit Front" | "Fina Suit Side" | "Fina Support Front" | "Fina Support Side" | "Florin Business Sitting Front" | "Florin Business Sitting Side" | "Florin Maintain Front 2" | "Florin Maintain Front" | "Florin Maintain Side 2" | "Florin Maintain Side" | "Florin Maintain Siiting Front" | "Florin Maintain Siiting Side" | "Florin Suit Front 2" | "Florin Suit Front" | "Florin Suit Side 2" | "Florin Suit Side" | "Francis in Blazer (Front)" | "Francis in Blazer (Left)" | "Francis in Blazer (Right)" | "Francis in Doctor (Front)" | "Francis in Doctor (Left)" | "Francis in Doctor (Right)" | "Francis in Shirt (Front)" | "Francis in Shirt (Left)" | "Francis in Shirt (Right)" | "Fred in Blue Long Shirt (Front)" | "Fred in Blue Long Shirt (Left)" | "Fred in Blue Long Shirt (Right)" | "Fred in Blue Short Shirt (Front)" | "Fred in Blue Short Shirt (Left)" | "Fred in Blue Short Shirt (Right)" | "Freja in Blue blazer" | "Freja in Grey blazer" | "Freja in White blazer" | "Freja in White polo shirt" | "Freja in White shirt" | "Gabriel in Black Sweatshirt" | "Gabriel in Blue Suit" | "Gabriel in Gray Shirt" | "Gala Bedroom Front" | "Gala Business Sofa Front 2" | "Gala Business Sofa Front 3" | "Gala Business Sofa Front" | "Gala Business Sofa Side 2" | "Gala Business Sofa Side 3" | "Gala Business Sofa Side" | "Gala Casual Sofa with iPad Front" | "Gala Casual Sofa with iPad Side 2" | "Gala Casual Sofa with iPad Side" | "Gala Office Front" | "Gala Office Side" | "Gala Sofa Front 2" | "Gala Sofa Front 3" | "Gala Sofa Front" | "Gala Sofa Side 2" | "Gala Sofa Side 3" | "Gala Sofa Side" | "Georgia (Upper Body)" | "Georgia Casual Front" | "Georgia Casual Side" | "Georgia Office Front" | "Georgia Office Side" | "Gerardo Sofa Side" | "Gerardo Indoor Front" | "Gerardo Indoor Side" | "Gerardo Night Scene Front 2" | "Gerardo Night Scene Front" | "Gerardo Outdoor Sport Front" | "Gerardo Outdoor Sport Side" | "Gerardo Sofa Front" | "Giulia Office Front 2" | "Giulia Office Front" | "Giulia Office Side 2" | "Giulia Office Side" | "Giulia Sofa Front" | "Giulia Sofa Side" | "Giulia Sofa Front 2" | "Giulia Sofa Side 3" | "Hada Casual Cup Front" | "Hada Casual Cup Side" | "Hada Casual Front" | "Hada Casual Side" | "Hada Casual Sitting Front 2" | "Hada Casual Sitting Front" | "Hada Casual Sitting Side 2" | "Hada Casual Sitting Side" | "Hada LivelyGestures Front" | "Hada LivelyGestures Side" | "Hada LivelyGestures Sitting Front" | "Hada LivelyGestures Sitting Side" | "Hada Suit Front 2" | "Hada Suit Front" | "Hada Suit Side 2" | "Hada Suit Side" | "Hada Suit Sitting Front 2" | "Hada Suit Sitting Front" | "Hada Suit Sitting Side" | "Harrison in Black Suit" | "Harrison in Gray Suit" | "Harrison in White Shirt" | "Ian in Beige Shirt" | "Ian in Black Jacket" | "Ian in Gray Suit" | "Ida Lounge Front 2" | "Ida Lounge Front" | "Ida Lounge Side 2" | "Ida Lounge Side" | "Ida Sofa Front 2" | "Ida Sofa Front" | "Ida Sofa Side 2" | "Ida Sofa Side" | "Iker in Almond sweater" | "Iker in Black blazer" | "Iker in Blue shirt" | "Iker in Grey blazer" | "Iker in White shirt" | "Imelda BizTalk Front" | "Imelda BizTalk Side" | "Imelda Business Front" | "Imelda Business Side" | "Imelda Business Sitting Front 2" | "Imelda Business Sitting Front" | "Imelda Business Sitting Side 2" | "Imelda Business Sitting Side" | "Imelda Casual Front" | "Imelda Casual Side" | "Imelda Coat Front 2" | "Imelda Coat Front" | "Imelda Coat Side" | "Imelda Customer Support Front" | "Imelda Customer Support Side" | "Imelda Full Side " | "Imelda Suit Front" | "Imelda Suit Side" | "Ivan in Black Suit" | "Ivan in Gary Suit" | "Ivan in Suit" | "Ivan in Sweater" | "Jason in Black Jacket" | "Jason in Blue Suit" | "Jason in Gray Shirt" | "Javi Intense Sitting Speaking Front" | "Javi Intense Sitting Speaking Side" | "Javi Intense Sitting Speaking " | "Javi Intense Speaking Front 2" | "Javi Intense Speaking Front 3" | "Javi Intense Speaking Front" | "Javi Intense Speaking Side 3" | "Javi Intense Speaking Side" | "Javi in Passionate Gestures 2" | "Javi in Passionate Gestures 3" | "Javi in Passionate Gestures" | "Jin (Upper Body)" | "Jin Blue Casual Front" | "Jin Blue Casual Side" | "Jin Business Café Mode Front" | "Jin Business Café Mode Side" | "Jin Business Sitting Front" | "Jin Business Sitting Side" | "Jin Casual Café Mode Front" | "Jin Casual Café Mode Side" | "Jin Casual Sitting Front" | "Jin Casual Sitting Side" | "Jin Suit Front" | "Jin Suit Side" | "Jin Vest Front" | "Jin Vest Side" | "Jin Vest Sitting Front" | "Jin Vest Sitting Side" | "Jin in Education Front" | "Jin in Education Side" | "Jinwoo in Black vest" | "Jinwoo in Blue suit" | "Jinwoo in White T-shirt" | "Jinwoo in White shirt" | "Jinwoo in White suit" | "Jocelyn Office Front 2" | "Jocelyn Office Front" | "Jocelyn Office Side 2" | "Jocelyn Office Side" | "Jocelyn Sofa Front 2" | "Jocelyn Sofa Front" | "Jocelyn Sofa Side 2" | "Jocelyn Sofa Side" | "Joel Couch Front" | "Joel Couch Side" | "Joel Gym Front" | "Joel Gym Side" | "Joel Mountain Front" | "Joel Mountain Side" | "Jonas (Upper Body)" | "Jonas Gym Front 2" | "Jonas Gym Front" | "Jonas Gym Side 2" | "Jonas Gym Side " | "June Office Front 2" | "Juan Office Front" | "Juan Office Side 2" | "Juan Office Side" | "Juan Sofa Front 2" | "Juan Sofa Front" | "Juan Sofa Side 2" | "Juan Sofa Side" | "Judita (Upper Body)" | "Judita Yoga Front 2" | "Judita Yoga Front" | "Judita Yoga Side 2" | "Judita Yoga Side" | "Judith (Upper Body)" | "Judith Business Front 2" | "Judith Business Front" | "Judith Business Sitting Front" | "Judith Casual Front" | "Judith Casual Sitting Front" | "Judith Suit Front" | "Judy Business Front" | "Judy Business Side" | "Judy Business Sitting Front" | "Judy Casual Front" | "Judy Casual Side" | "Judy Casual Sitting" | "Judy ConfidentSpeaking Front" | "Judy ConfidentSpeaking Side" | "Judy Doctor Sitting" | "Judy Doctor Standing" | "Judy Lawyer" | "Judy NurseSitting Front 2" | "Judy NurseSitting Front" | "Judy NurseSitting Side 2" | "Judy NurseSitting Side" | "Judy Nurse Front 2" | "Judy Nurse Front" | "Judy Nurse OnDuty Front" | "Judy Nurse OnDuty Side" | "Judy Nurse Side 2" | "Judy Nurse Side" | "Judy Suit Front" | "Judy Suit Side" | "Judy Teacher Sitting" | "Judy HR" | "Judy Teacher Standing" | "June (Upper Body)" | "June HR" | "June Office Front" | "June Office Side 2" | "June Office Side" | "June Sofa Casual Front 2" | "June Sofa Casual Front" | "June Sofa Casual Side 2" | "June Sofa Casual Side" | "Justin in Black Shirt" | "Justin in Black Suit" | "Justin in White Shirt" | "Justo Business Front 2" | "Justo Business Front" | "Justo Business Side 2" | "Justo Business Side" | "Justo Casual Sitting" | "Justo CustomerService Front 2" | "Justo CustomerService Front" | "Justo CustomerService Side" | "Justo CustomerService Sitting Front" | "Justo CustomerService Sitting Side" | "Justo EmployeeTraining Front" | "Justo Suit Casual Front " | "Justo Suit Casual Side " | "Karolin in Gray Suit" | "Karolin in Sweatshirt" | "Karolin in Black Suit" | "Kavya Indoor Front" | "Kavya Indoor Side" | "Kavya Outdoor Side" | "Kavya Outdoor Sport Front" | "Kavya Outdoor Sport Side" | "Kavya Sofa Front" | "Kavya Sofa Side" | "Kelly in Blue Shirt (Front)" | "Kelly in Blue Shirt (Left)" | "Kelly in Blue Shirt (Right)" | "Kelly in Doctor (Front)" | "Kelly in Doctor (Left)" | "Kelly in Doctor (Right)" | "Kelly in Pink Shirt (Front)" | "Kelly in Pink Shirt (Left)" | "Kelly in Pink Shirt (Right)" | "Klara in Black blazer" | "Klara in Blue blazer" | "Klara in Blue dress" | "Klara in Pink shirt" | "Klara in White blazer" | "Klara in White shirt" | "Kristin in Lace Dress" | "Kristin in V-neck Shirt" | "Kristin in Black Suit" | "Leos Office Front 2" | "Leos Office Front" | "Leos Office Side 2" | "Leos Office Side" | "Leos Sofa Front 2" | "Leos Sofa Front" | "Leos Sofa Side 2" | "Leos Sofa Side" | "Leszek Lounge Front" | "Leszek Lounge Side" | "Leszek Outdoor Business Front" | "Leszek Outdoor Business Side" | "Leszek Outdoor Casual Front" | "Leszek Outdoor Casual Side" | "Leszek Sofa Front" | "Leszek Sofa Side" | "Leah" | "Leah in Black Suit" | "Lina Casual Front 2" | "Lina Casual Front" | "Lina Casual Side 2" | "Lina Casual Side" | "Lina Casual Sitting Front" | "Lina Casual Sitting Side" | "Lina Dress Front" | "Lina Dress Side 2" | "Lina Dress Side" | "Lina Dress Sitting Front" | "Lina Dress Sitting Side" | "Lina Sweater Front 2" | "Lina Sweater Front" | "Lina Sweater Side 2" | "Lina Sweater Side" | "Lina Sweater Sitting Front 2" | "Lina Sweater Sitting Front" | "Lina Sweater Sitting Side 2" | "Lina Sweater Sitting Side" | "Lisa" | "Luca" | "Lucien in Black coat" | "Lucien in Black polo-shirt" | "Lucien in Blue blazer" | "Lucien in Blue shirt" | "Lucien in Grey blazer" | "Lucien in White shirt" | "Luke in Brown Suit" | "Luke in Yellow Jacket" | "Luke in Blue Suit" | "Marcus (Upper Body)" | "Marcus Café Front 2" | "Marcus Café Front" | "Marcus Café Side 2" | "Marcus Café Side" | "Marcus Casual Front" | "Marcus Casual Side" | "Marcus Casual Sitting Front 2" | "Marcus Casual Sitting Front" | "Marcus Casual Sitting Side 2" | "Marcus Casual Sitting Side" | "Marcus Denim Jacket Front" | "Marcus Denim Jacket Side" | "Marcus Education Front" | "Marcus Education Side" | "Marcus Sitting Front" | "Marcus Sitting Side" | "Marcus Suit Front" | "Marcus Suit Side" | "Maria in Suit" | "Maria in Sweater" | "Maria in Black Suit" | "Martina Office Front 2" | "Martina Office Front" | "Martina Office Side 2" | "Martina Office Side" | "Martina Sofa Front 2" | "Martina Sofa Front" | "Martina Sofa Side 2" | "Martina Sofa Side" | "Masha Office Front 2" | "Masha Office Front" | "Masha Office Side 2" | "Masha Office Side" | "Masha Sofa Casual Front 2" | "Masha Sofa Casual Front" | "Masha Sofa Casual Side 2" | "Masha Sofa Casual Side" | "Mason in Blue Suit" | "Mason in White Shirt" | "Mason in Blue Sweater" | "Matteo Office Front 2" | "Matteo Office Front" | "Matteo Office Side 2" | "Matteo Office Side" | "Matteo Sofa Front 2" | "Matteo Sofa Front" | "Matteo Sofa Side 2" | "Matteo Sofa Side" | "Max (Upper Body)" | "Max Indoor Front" | "Max Indoor Side" | "Max Outdoor Sport Front" | "Max Outdoor Sport Side" | "Milena Office Front 2" | "Milena Office Front" | "Milena Office Side 2" | "Milena Office Side" | "Milena Sofa Front 2" | "Milena Sofa Front" | "Milena Sofa Side 2" | "Milena Sofa Side" | "Miles (Upper Body)" | "Miles Outdoor Front" | "Miles Outdoor Side" | "Miles Sofa Front 2" | "Miles Sofa Front" | "Miles Sofa Side 2" | "Miles Sofa Side" | "Minho in Blue blazer" | "Minho in Blue shirt" | "Minho in Green polo-shirt" | "Minho in Khaki jacket" | "Minho in White shirt" | "Minho in White t-shirt" | "Mireia Business Indoor Front" | "Mireia Business Indoor Side" | "Mireia Outdoor Chair Front" | "Mireia Outdoor Front" | "Mireia Outdoor Side" | "Mireia Outdoor Table Front" | "Mireia Outdoor Table Side" | "Miyu Office Front 2" | "Miyu Office Front" | "Miyu Office Side 2" | "Miyu Office Side" | "Miyu Sofa Business Front" | "Miyu Sofa Business Side" | "Miyu Sofa Casual Front 2" | "Miyu Sofa Casual Front" | "Miyu Sofa Casual Side 2 " | "Miyu Sofa Casual Side" | "Nadim in Black blazer" | "Nadim in Blue blazer" | "Nadim in Blue jacket" | "Nadim in Puffer vest" | "Nadim in White shirt" | "Neil in Black Shirt" | "Neil in Black Suit " | "Neil in Yellow Jacket" | "Nico" | "Noah Lobby Front 2" | "Noah Lobby Front" | "Noah Lobby Side 2" | "Noah Lobby Side" | "Noah Office Front 2" | "Noah Office Front" | "Noah Office Side 2" | "Noah Office Side" | "Noah Sofa Front 2" | "Noah Sofa Front " | "Noah Sofa Side 2" | "Noah Sofa Side" | "Nour in Black blazer" | "Nour in Brown dress" | "Nour in Grey hoodie" | "Nour in Sporty vest" | "Nour in White shirt" | "Odelia in Blue Suit" | "Odelia in Red Dress " | "Odelia in Yellow Suit" | "Onat (Upper Body)" | "Onat Casual Front" | "Onat Casual Side" | "Onat Casual Sitting Front" | "Onat Casual Sitting Side" | "Onat Macbook Front 2" | "Onat Macbook Front" | "Onat Macbook Side 2" | "Onat Macbook Side" | "Onat Suit Front" | "Onat Suit Side" | "Onat Suit Sitting Front" | "Onat Suit Sitting Side" | "Onat iPad Front" | "Onat iPad Side" | "Onat iPad Sitting Front" | "Onat iPad Sitting Side" | "Oxana (Upper Body)" | "Oxana Gym Front 2" | "Oxana Gym Front" | "Oxana Gym Side 2" | "Oxana Gym Side" | "Oxana Office Front 2" | "Oxana Office Front" | "Oxana Office Side 2" | "Oxana Office Side" | "Oxana Sofa Front 2" | "Oxana Sofa Front" | "Oxana Sofa Side 2" | "Oxana Sofa Side" | "Oxana Yoga Front 2" | "Oxana Yoga Front" | "Oxana Yoga Side 2" | "Oxana Yoga Side" | "Patrizio Business Training Front" | "Patrizio Business Training Side" | "Patrizio Office Front" | "Patrizio Office Side 2" | "Patrizio Office Side" | "Patrizio Sofa Front" | "Patrizio Sofa Side" | "Piper Business Sofa Front" | "Piper Business Sofa Side" | "Piper Education Front" | "Piper Education Side" | "Rasmus Lounge Front 2" | "Rasmus Lounge Front" | "Rasmus Lounge Side 2" | "Rasmus Lounge Side" | "Rasmus Sofa Front 2" | "Rasmus Sofa Front" | "Rasmus Sofa Side 2" | "Rasmus Sofa Side" | "Raul (Upper Body)" | "Raul Business Sofa Front 2" | "Raul Business Sofa Front" | "Raul Business Sofa Side 2" | "Raul Business Sofa Side" | "Raul Casual Sofa Front 2" | "Raul Casual Sofa Front" | "Raul Casual Sofa Side 2" | "Raul Casual Sofa Side" | "Raul Casual Sofa no iPad Front" | "Raul Casual Sofa no iPad Side" | "Raul Casual Sofa with iPad Front" | "Raul Casual Sofa with iPad Side" | "Raul Office Front 2" | "Raul Office Front" | "Raul Office Side" | "Raul Sofa Front 2" | "Raul Sofa Front" | "Raul Sofa Side 2" | "Raul Sofa Side" | "Rebecca" | "Ren (Upper Body)" | "Ren Office Front 2" | "Ren Office Front" | "Ren Office Side 2" | "Ren Office Side" | "Ren Sofa Business Front" | "Ren Sofa Business Side" | "Ren Sofa Casual Front 2" | "Ren Sofa Casual Front" | "Ren Sofa Casual Side 2 " | "Ren Sofa Casual Side" | "Riley (Upper Body)" | "Riley Casual Front" | "Riley Casual Side" | "Riley Office Front" | "Riley Office Side" | "Roman Outdoor Sport Front" | "Roman Outdoor Sport Side" | "Sabine Office Front 2" | "Sabine Office Front" | "Sabine Office Side" | "Sabine Sofa Side" | "Salma in headscarf (Front)" | "Salma in headscarf (Left)" | "Salma in headscarf (Right)" | "Santa Avatar Present Standing " | "Santa Avatar Sitting 2" | "Santa Avatar Sitting" | "Santa Avatar Sitting Side" | "Santa Avatar Standing" | "Santa Fireplace Front" | "Santa Fireplace Side" | "Saskia in Blue blazer" | "Saskia in Blue shirt" | "Saskia in Green blazer" | "Saskia in Grey vest" | "Saskia in White blazer" | "Scarlett Couch Front 2" | "Scarlett Couch Front" | "Scarlett Couch Side 2" | "Scarlett Couch Side" | "Scarlett Fireplace Front" | "Scarlett Fireplace Side" | "Scarlett Hall Front" | "Scarlett Hall Side" | "Scarlett Yoga Front" | "Scarlett Yoga Side" | "Seema Business Front" | "Seema Business Side" | "Seema Business Sitting Side" | "Seema Casual Front" | "Seema Casual Side" | "Seema Casual Sitting Front" | "Seema Casual Sitting Side" | "Seema Nurse Front 2" | "Seema Nurse Front" | "Seema Nurse Side 2" | "Seema Nurse Side" | "Shawn Business Front" | "Shawn Business Side" | "Shawn Casual Sitting Front" | "Shawn Casual Sitting Side" | "Shawn Sitting Front" | "Shawn Sitting Side" | "Shawn Suit Front" | "Shawn Suit Side" | "Shawn Therapist" | "Shirley Business Front" | "Shirley Business Side" | "Shirley Casual Front 2" | "Shirley Casual Front" | "Shirley Casual Side" | "Shirley Casual Sitting Front 2" | "Shirley Casual Sitting Front" | "Shirley Casual Sitting Side 2" | "Shirley Casual Sitting Side" | "Shirley Education Front" | "Shirley Education Side 2" | "Shirley Education Side" | "Shirley Sitting Front" | "Shirley Sitting Side" | "Shirley Skirt Front" | "Shirley Skirt Side" | "Shirley Suit Front" | "Shirley Suit Side" | "Silas (Upper Body)" | "Silas Customer Support" | "Silas HR" | "Silas Lounge Front" | "Silas Lounge Side" | "Silas Sofa Side 2" | "Silas Sofa Side" | "Sloane in Blue dress" | "Sloane in Grey suit" | "Sloane in Pink sweater" | "Sophia in Black Shirt" | "Sophia in Suit " | "Sophia in White Suit" | "Sophie" | "Stacy in Doctor (Front)" | "Stacy in Doctor (Left)" | "Stacy in Doctor (Right)" | "Stacy in Dress (Front)" | "Stacy in Dress (Left)" | "Stacy in Dress (Right)" | "Stacy in Shirt (Front)" | "Stacy in Shirt (Left)" | "Stacy in Shirt (Right)" | "Susan in Black Shirt" | "Susan in Black Suit" | "Susan in Suit" | "Tahlia in Blue dress" | "Tahlia in Blue suit" | "Tahlia in Dark blue suit" | "Tahlia in Red suit" | "Tahlia in White shirt" | "Teodor (Upper Body)" | "Teodor Office Front 2" | "Teodor Office Front" | "Teodor Office Side 2" | "Teodor Office Side" | "Teodor Sofa Front 2" | "Teodor Sofa Front" | "Teodor Sofa Side 2" | "Teodor Sofa Side" | "Timothy (Upper Body)" | "Timothy Casual Front" | "Timothy Casual Side" | "Timothy Office Front" | "Timothy Office Side" | "Tito Casual Front 2" | "Tito Casual Front" | "Tito Casual Side 2" | "Tito Casual Side" | "Tito Casual Sitting Front" | "Tito Casual Sitting Side" | "Tito Coat Front 2" | "Tito Coat Front" | "Tito Coat Full Side" | "Tito Coat Side 2" | "Tito Coat Side" | "Tito Painter Front" | "Tito Painter Full Side" | "Tito Painter Side" | "Tito Painter Sitting Front" | "Tito Painter Sitting Side" | "Travis in Polo Shirt" | "Travis in Gray Suit" | "Travis in Black Suit" | "Trevor in Plaid Shirt" | "Trevor in Blue Shirt" | "Trevor in Blue Suit" | "Trevor in Suit" | "Tuba Business Chair Front" | "Tuba Business Chair Side" | "Tuba Business Front" | "Tuba Business Side" | "Tuba Business Sofa Front" | "Tuba Business Sofa Side" | "Tuba Casual Front" | "Tuba Casual Side" | "Tuba Casual Sitting Front" | "Tuba Casual Sitting Side" | "Tuba Macbook Front" | "Tuba Macbook Side" | "Veit Office Front" | "Veit Office Side" | "Veit Sofa Front" | "Veit Sofa Side" | "Vernon Office Front 2" | "Vernon Office Front" | "Vernon Office Side 2" | "Vernon Office Side" | "Verena Office Front" | "Verena Office Side" | "Verena Sofa Front" | "Verena Sofa Side" | "Vernon (Upper Body)" | "Vernon Lounge Front 2" | "Vernon Lounge Side 2" | "Vernon Lounge Side" | "Vince (Upper Body)" | "Vince Business Sofa Front" | "Vince Business Training Front" | "Vince Business Training Side 2" | "Vince Business Training Side" | "Vince Sofa Casual Front 2" | "Vince Sofa Casual Front" | "Vince Sofa Casual Side 2" | "Vince Sofa Casual Side" | "Violante Brown Suit Front 2" | "Violante Brown Suit Front " | "Violante Business Sitting Front" | "Violante Business Sitting Side" | "Violante Casual Sitting Front " | "Violante Casual Sitting Side" | "Violante Sport Front 2" | "Violante Sport Front" | "Violante Sport Side" | "Violante Sport Sitting Side" | "Violante Suit Front" | "Vivianna in Black shirt" | "Wade in Black Suit" | "Wade in Black Jacket" | "Wade in Gray Jacket" | "Yola Active Speaking Front" | "Yola Active Speaking Side" | "Yola Business Front" | "Yola Business Side" | "Yola Calm Speaking Front" | "Yola Calm Speaking Side" | "Yola Casual Front" | "Yola Casual Side" | "Yola Employee Badge Front" | "Yola Employee Badge Side" | "Zosia in Blue blazer" | "Zosia in Green dress" | "Zosia in Khaki blazer" | "Zosia in White dress" | "Zosia in Yellow shirt" | "Austin in Suit" | "Austin in Blue Casual Suit" | "Austin in Black Jacket" | "Austin in Blue Suit" | "Candace in Pink Blazer (Upper Body)" | "Chad in Blue Shirt (Upper Body)" | "Daisy in Suit" | "Daisy in Shirt" | "Daisy in Dress" | "Daisy in T-shirt" | "Francis in Blazer (Upper Body)" | "Matthew" | "Matthew in Suit" | "Matthew in Flowery Shirt" | "Matthew in Grey Sweater" | "Nik in Black Shirt" | "Nik in Blue Sweater" | "Tyler in Casual Suit" | "Tyler in Shirt" | "Tyler in Suit"; /** * Style of the avatar display Default value: `"closeUp"` */ avatar_style?: "normal" | "circle" | "closeUp"; }; export type ChatInput = { /** * This sets the upper limit for the number of tokens the model can generate in response. It won't produce more than this limit. The maximum value is the context length minus the prompt length. */ max_tokens?: number; /** * Name of the model to use. Charged based on actual token usage. */ model: string; /** * Prompt to be used for the chat completion */ prompt: string; /** * Should reasoning be the part of the final answer. */ reasoning?: boolean; /** * System prompt to provide context or instructions to the model */ system_prompt?: string; /** * This setting influences the variety in the model's responses. Lower values lead to more predictable and typical responses, while higher values encourage more diverse and less common responses. At 0, the model always gives the same response for a given input. Default value: `1` */ temperature?: number; }; export type ChatOutput = { /** * Error message if an error occurred */ error?: string; /** * Generated output */ output: string; /** * Whether the output is partial */ partial?: boolean; /** * Usage information */ usage?: CompletionUsage; }; export type ChatterboxSpeechToSpeechInput = { /** * */ source_audio_url: string | Blob | File; /** * Optional URL to an audio file to use as a reference for the generated speech. If provided, the model will try to match the style and tone of the reference audio. */ target_voice_audio_url?: string | Blob | File; }; export type ChatterboxTextToSpeechInput = { /** * Optional URL to an audio file to use as a reference for the generated speech. If provided, the model will try to match the style and tone of the reference audio. Default value: `"https://storage.googleapis.com/chatterbox-demo-samples/prompts/male_rickmorty.mp3"` */ audio_url?: string | Blob | File; /** * Default value: `0.5` */ cfg?: number; /** * Exaggeration factor for the generated speech (0.0 = no exaggeration, 1.0 = maximum exaggeration). Default value: `0.25` */ exaggeration?: number; /** * Useful to control the reproducibility of the generated audio. Assuming all other properties didn't change, a fixed seed should always generate the exact same audio file. Set to 0 for random seed.. */ seed?: number; /** * Temperature for generation (higher = more creative). Default value: `0.7` */ temperature?: number; /** * The text to be converted to speech (maximum 5000 characters). You can additionally add the following emotive tags: , , , , , , , */ text: string; }; export type ChatterboxTextToSpeechMultilingualInput = { /** * Configuration/pace weight controlling generation guidance (0.0-1.0). Use 0.0 for language transfer to mitigate accent inheritance. Default value: `0.5` */ cfg_scale?: number; /** * If using a custom audio URL, specify the language of the audio here. Ignored if voice is not a custom url. */ custom_audio_language?: "english" | "arabic" | "danish" | "german" | "greek" | "spanish" | "finnish" | "french" | "hebrew" | "hindi" | "italian" | "japanese" | "korean" | "malay" | "dutch" | "norwegian" | "polish" | "portuguese" | "russian" | "swedish" | "swahili" | "turkish" | "chinese"; /** * Controls speech expressiveness and emotional intensity (0.25-2.0). 0.5 is neutral, higher values increase expressiveness. Extreme values may be unstable. Default value: `0.5` */ exaggeration?: number; /** * Random seed for reproducible results. Set to 0 for random generation, or provide a specific number for consistent outputs. */ seed?: number; /** * Controls randomness and variation in generation (0.05-5.0). Higher values create more varied speech patterns. Default value: `0.8` */ temperature?: number; /** * The text to be converted to speech (maximum 300 characters). Supports 23 languages including English, French, German, Spanish, Italian, Portuguese, Hindi, Arabic, Chinese, Japanese, Korean, and more. */ text: string; /** * Language code for synthesis. In case using custom please provide audio url and select custom_audio_language. Default value: `"english"` */ voice?: string; }; export type ChromaticAberrationInput = { /** * Blue channel shift direction Default value: `"horizontal"` */ blue_direction?: "horizontal" | "vertical"; /** * Blue channel shift amount */ blue_shift?: number; /** * Green channel shift direction Default value: `"horizontal"` */ green_direction?: "horizontal" | "vertical"; /** * Green channel shift amount */ green_shift?: number; /** * URL of image to process */ image_url: string | Blob | File; /** * Red channel shift direction Default value: `"horizontal"` */ red_direction?: "horizontal" | "vertical"; /** * Red channel shift amount */ red_shift?: number; }; export type ChronoEditInput = { /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to enable temporal reasoning. */ enable_temporal_reasoning?: boolean; /** * The guidance scale for the inference. Default value: `1` */ guidance_scale?: number; /** * The image to edit. */ image_url: string | Blob | File; /** * The number of inference steps to perform. Default value: `8` */ num_inference_steps?: number; /** * The number of temporal reasoning steps to perform. Default value: `8` */ num_temporal_reasoning_steps?: number; /** * The format of the output image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The resolution of the output image. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The seed for the inference. */ seed?: number; /** * Whether to return the image in sync mode. */ sync_mode?: boolean; /** * Enable turbo mode to use for faster inference. Default value: `true` */ turbo_mode?: boolean; }; export type ChronoEditLoraInput = { /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to enable temporal reasoning. */ enable_temporal_reasoning?: boolean; /** * The guidance scale for the inference. Default value: `1` */ guidance_scale?: number; /** * The image to edit. */ image_url: string | Blob | File; /** * Optional additional LoRAs to merge for this request (max 3). */ loras?: Array; /** * The number of inference steps to perform. Default value: `8` */ num_inference_steps?: number; /** * The number of temporal reasoning steps to perform. Default value: `8` */ num_temporal_reasoning_steps?: number; /** * The format of the output image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The resolution of the output image. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The seed for the inference. */ seed?: number; /** * Whether to return the image in sync mode. */ sync_mode?: boolean; /** * Enable turbo mode to use for faster inference. Default value: `true` */ turbo_mode?: boolean; }; export type ChronoEditOutput = { /** * The edited image. */ images: Array; /** * The prompt used for the inference. */ prompt: string; /** * The seed for the inference. */ seed: number; }; export type ChronoEditPaintBrushInput = { /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * Classifier-free guidance scale. Default value: `1` */ guidance_scale?: number; /** * The image to edit. */ image_url: string | Blob | File; /** * The scale factor for the LoRA adapter. Default value: `1` */ lora_scale?: number; /** * Optional additional LoRAs to merge (max 3). */ loras?: Array; /** * Optional mask image where black areas indicate regions to sketch/paint. */ mask_url?: string | Blob | File; /** * Number of denoising steps to run. Default value: `8` */ num_inference_steps?: number; /** * The format of the output image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Describe how to transform the sketched regions. */ prompt: string; /** * The resolution of the output image. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The seed for the inference. */ seed?: number; /** * Whether to return the image in sync mode. */ sync_mode?: boolean; /** * Enable turbo mode to use faster inference. Default value: `true` */ turbo_mode?: boolean; }; export type ChronoEditUpscalerInput = { /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale for the inference. Default value: `1` */ guidance_scale?: number; /** * The image to upscale. */ image_url: string | Blob | File; /** * The scale factor for the LoRA adapter. Default value: `1` */ lora_scale?: number; /** * Optional additional LoRAs to merge (max 3). */ loras?: Array; /** * Number of inference steps for the upscaling pass. Default value: `30` */ num_inference_steps?: number; /** * The format of the output image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png" | "webp"; /** * The seed for the inference. */ seed?: number; /** * Whether to return the image in sync mode. */ sync_mode?: boolean; /** * Target scale factor for the output resolution. Default value: `2` */ upscale_factor?: number; }; export type ChronoLoraWeight = { /** * URL or path to the LoRA weights (Safetensors). */ path: string; /** * Scale factor controlling LoRA strength. Default value: `1` */ scale?: number; }; export type CityTeleportInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Camera angle for the shot Default value: `"eye_level"` */ camera_angle?: "eye_level" | "low_angle" | "high_angle" | "dutch_angle" | "birds_eye_view" | "worms_eye_view" | "overhead" | "side_angle"; /** * Optional city background image URL. When provided, the person will be blended into this custom scene. */ city_image_url?: string | Blob | File; /** * City name (used when city_image_url is not provided) */ city_name: string; /** * Person photo URL */ person_image_url: string | Blob | File; /** * Type of photo shot Default value: `"medium_shot"` */ photo_shot?: "extreme_close_up" | "close_up" | "medium_close_up" | "medium_shot" | "medium_long_shot" | "long_shot" | "extreme_long_shot" | "full_body"; }; export type ClarityUpscalerInput = { /** * The creativity of the model. The higher the creativity, the more the model will deviate from the prompt. * Refers to the denoise strength of the sampling. Default value: `0.35` */ creativity?: number; /** * If set to false, the safety checker will be disabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The URL of the image to upscale. */ image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality:2)"` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `18` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `"masterpiece, best quality, highres"` */ prompt?: string; /** * The resemblance of the upscaled image to the original image. The higher the resemblance, the more the model will try to keep the original image. * Refers to the strength of the ControlNet. Default value: `0.6` */ resemblance?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The upscale factor Default value: `2` */ upscale_factor?: number; }; export type ClarityUpscalerOutput = { /** * The URL of the generated image. */ image: Image; /** * The seed used to generate the image. */ seed: number; /** * The timings of the different steps in the workflow. */ timings: unknown; }; export type codeformerInput = { /** * Should faces etc should be aligned. */ aligned?: boolean; /** * Should faces be upscaled Default value: `true` */ face_upscale?: boolean; /** * Weight of the fidelity factor. Default value: `0.5` */ fidelity?: number; /** * URL of image to be used for relighting */ image_url: string | Blob | File; /** * Should only center face be restored */ only_center_face?: boolean; /** * Random seed for reproducible generation. */ seed?: number; /** * Upscaling factor Default value: `2` */ upscale_factor?: number; }; export type Cogvideox5bImageToVideoInput = { /** * The target FPS of the video Default value: `16` */ export_fps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ guidance_scale?: number; /** * The URL to the image to generate the video from. */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. We currently support one lora. */ loras?: Array; /** * The negative prompt to generate video from Default value: `""` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * Use RIFE for video interpolation Default value: `true` */ use_rife?: boolean; /** * The size of the generated video. */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; }; export type Cogvideox5bInput = { /** * The target FPS of the video Default value: `16` */ export_fps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ guidance_scale?: number; /** * The LoRAs to use for the image generation. We currently support one lora. */ loras?: Array; /** * The negative prompt to generate video from Default value: `""` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * Use RIFE for video interpolation Default value: `true` */ use_rife?: boolean; /** * The size of the generated video. */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; }; export type Cogvideox5bOutput = { /** * The prompt used for generating the video. */ prompt: string; /** * Seed of the generated video. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; /** * The URL to the generated video */ video: File; }; export type Cogvideox5bVideoToVideoInput = { /** * The target FPS of the video Default value: `16` */ export_fps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ guidance_scale?: number; /** * The LoRAs to use for the image generation. We currently support one lora. */ loras?: Array; /** * The negative prompt to generate video from Default value: `""` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The strength to use for Video to Video. 1.0 completely remakes the video while 0.0 preserves the original. Default value: `0.8` */ strength?: number; /** * Use RIFE for video interpolation Default value: `true` */ use_rife?: boolean; /** * The size of the generated video. */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The video to generate the video from. */ video_url: string | Blob | File; }; export type cogview4Input = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Color = { /** * The blue component of the color (0-255). */ blue: number; /** * The green component of the color (0-255). */ green: number; /** * The name of the color. */ name?: string; /** * The red component of the color (0-255). */ red: number; }; export type ColorCorrectionInput = { /** * Brightness adjustment */ brightness?: number; /** * Contrast adjustment */ contrast?: number; /** * Gamma adjustment Default value: `1` */ gamma?: number; /** * URL of image to process */ image_url: string | Blob | File; /** * Saturation adjustment */ saturation?: number; /** * Color temperature adjustment */ temperature?: number; }; export type ColorizeInput = { /** * Select the color palette or aesthetic for the output image */ color: "contemporary color" | "vivid color" | "black and white colors" | "sepia vintage"; /** * The source image. */ image_url: string | Blob | File; }; export type ColorPalette = { /** * A list of color palette members that define the color palette */ members?: Array; /** * A color palette preset value */ name?: "EMBER" | "FRESH" | "JUNGLE" | "MAGIC" | "MELON" | "MOSAIC" | "PASTEL" | "ULTRAMARINE"; }; export type ColorPaletteMember = { /** * The weight of the color in the color palette Default value: `0.5` */ color_weight?: number; /** * RGB color value for the palette member */ rgb: RGBColor; }; export type ColorTintInput = { /** * URL of image to process */ image_url: string | Blob | File; /** * Tint color mode Default value: `"sepia"` */ tint_mode?: "sepia" | "red" | "green" | "blue" | "cyan" | "magenta" | "yellow" | "purple" | "orange" | "warm" | "cool" | "lime" | "navy" | "vintage" | "rose" | "teal" | "maroon" | "peach" | "lavender" | "olive"; /** * Tint strength Default value: `1` */ tint_strength?: number; }; export type CombineInput = { /** * URL of the audio file to use as the audio track */ audio_url: string | Blob | File; /** * Offset in seconds for when the audio should start relative to the video */ start_offset?: number; /** * URL of the video file to use as the video track */ video_url: string | Blob | File; }; export type ComfyInput = { /** * Disable saving prompt metadata in files. */ disable_metadata?: boolean; /** * */ extra_data?: unknown; /** * */ prompt: unknown; }; export type CompletionUsage = { /** * Number of tokens in the completion */ completion_tokens: number; /** * Number of tokens in the prompt */ prompt_tokens: number; /** * Total tokens used */ total_tokens: number; }; export type ComposeOutput = { /** * URL of the video's thumbnail image */ thumbnail_url: string | Blob | File; /** * URL of the processed video file */ video_url: string | Blob | File; }; export type CompressImageInput = { /** * The URL of the image to compress */ image_url: string | Blob | File; /** * Maximum height in pixels (resizes if larger, maintains aspect ratio) */ max_height?: number; /** * Maximum width in pixels (resizes if larger, maintains aspect ratio) Default value: `470` */ max_width?: number; /** * Apply additional optimization (slightly slower but better compression) Default value: `true` */ optimize?: boolean; /** * Output format (jpg recommended for compression) Default value: `"jpg"` */ output_format?: "jpg" | "jpeg" | "webp" | "png"; /** * Compression quality (1-100, higher = better quality, larger file) Default value: `99` */ quality?: number; }; export type CompressImageOutput = { /** * Compressed image size in bytes */ compressed_size: number; /** * Compression ratio (compressed/original) */ compression_ratio: number; /** * Compressed image */ image: Image; /** * Original image size in bytes */ original_size: number; }; export type ConcatImageInput = { /** * Alignment of images Default value: `"center"` */ alignment?: "start" | "center" | "end"; /** * Background color for spacing Default value: `"white"` */ background_color?: "white" | "black" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta" | "transparent"; /** * Direction of concatenation Default value: `"horizontal"` */ direction?: "horizontal" | "vertical"; /** * List of image URLs to concatenate */ image_urls: Array; /** * Output format for the concatenated image Default value: `"png"` */ output_format?: "png" | "jpg" | "jpeg" | "webp"; /** * Spacing between images in pixels */ spacing?: number; }; export type ControlLoraWeight = { /** * URL of the image to be used as the control image. */ control_image_url: string | Blob | File; /** * URL or the path to the LoRA weights. */ path: string; /** * Type of preprocessing to apply to the input image. Default value: `"None"` */ preprocess?: "canny" | "depth" | "None"; /** * The scale of the LoRA weight. This is used to scale the LoRA weight * before merging it with the base model. Providing a dictionary as {"layer_name":layer_scale} allows per-layer lora scale settings. Layers with no scale provided will have scale 1.0. Default value: `1` */ scale?: unknown | number; }; export type ControlNet = { /** * The scale of the control net weight. This is used to scale the control net weight * before merging it with the base model. Default value: `1` */ conditioning_scale?: number; /** * optional URL to the controlnet config.json file. */ config_url?: string | Blob | File; /** * URL of the image to be used as the control image. */ control_image_url: string | Blob | File; /** * The percentage of the image to end applying the controlnet in terms of the total timesteps. Default value: `1` */ end_percentage?: number; /** * URL of the mask for the control image. */ mask_image_url?: string | Blob | File; /** * Threshold for mask. Default value: `0.5` */ mask_threshold?: number; /** * URL or the path to the control net weights. */ path: string; /** * The percentage of the image to start applying the controlnet in terms of the total timesteps. */ start_percentage?: number; /** * The optional variant if a Hugging Face repo key is used. */ variant?: string; }; export type controlnetsdxlInput = { /** * The scale of the ControlNet. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** * Url to input image */ image_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `50` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; }; export type ControlNetUnion = { /** * optional URL to the controlnet config.json file. */ config_url?: string | Blob | File; /** * The control images and modes to use for the control net. */ controls: Array; /** * URL or the path to the control net weights. */ path: string; /** * The optional variant if a Hugging Face repo key is used. */ variant?: string; }; export type ControlNetUnionInput = { /** * The scale of the control net weight. This is used to scale the control net weight * before merging it with the base model. Default value: `1` */ conditioning_scale?: number; /** * URL of the image to be used as the control image. */ control_image_url: string | Blob | File; /** * Control Mode for Flux Controlnet Union. Supported values are: * - canny: Uses the edges for guided generation. * - tile: Uses the tiles for guided generation. * - depth: Utilizes a grayscale depth map for guided generation. * - blur: Adds a blur to the image. * - pose: Uses the pose of the image for guided generation. * - gray: Converts the image to grayscale. * - low-quality: Converts the image to a low-quality image. */ control_mode: "canny" | "tile" | "depth" | "blur" | "pose" | "gray" | "low-quality"; /** * The percentage of the image to end applying the controlnet in terms of the total timesteps. Default value: `1` */ end_percentage?: number; /** * URL of the mask for the control image. */ mask_image_url?: string | Blob | File; /** * Threshold for mask. Default value: `0.5` */ mask_threshold?: number; /** * The percentage of the image to start applying the controlnet in terms of the total timesteps. */ start_percentage?: number; }; export type controlnextInput = { /** * Number of frames to process in each batch. Default value: `24` */ batch_frames?: number; /** * Condition scale for ControlNeXt. Default value: `1` */ controlnext_cond_scale?: number; /** * Chunk size for decoding frames. Default value: `2` */ decode_chunk_size?: number; /** * Frames per second for the output video. Default value: `7` */ fps?: number; /** * Guidance scale for the diffusion process. Default value: `3` */ guidance_scale?: number; /** * Height of the output video. Default value: `1024` */ height?: number; /** * URL of the reference image. */ image_url: string | Blob | File; /** * Maximum number of frames to process. Default value: `240` */ max_frame_num?: number; /** * Motion bucket ID for the pipeline. Default value: `127` */ motion_bucket_id?: number; /** * Number of inference steps. Default value: `25` */ num_inference_steps?: number; /** * Number of overlapping frames between batches. Default value: `6` */ overlap?: number; /** * Stride for sampling frames from the input video. Default value: `2` */ sample_stride?: number; /** * URL of the input video. */ video_url: string | Blob | File; /** * Width of the output video. Default value: `576` */ width?: number; }; export type ConversationStreamInput = { /** * URL to the input audio file (user's speech). */ audio_url: string | Blob | File; /** * Output audio format. 'wav' for standard audio with headers, 'pcm' for raw s16le PCM (lowest latency). Default value: `"wav"` */ output_format?: "wav" | "pcm"; /** * Text prompt describing the AI persona and conversation context. Default value: `"You are a wise and friendly teacher. Answer questions or provide advice in a clear and engaging way."` */ prompt?: string; /** * Random seed for reproducibility. */ seed?: number; /** * Audio sampling temperature. Higher values produce more diverse outputs. Default value: `0.8` */ temperature_audio?: number; /** * Text sampling temperature. Higher values produce more diverse outputs. Default value: `0.7` */ temperature_text?: number; /** * Top-K sampling for audio tokens. Default value: `250` */ top_k_audio?: number; /** * Top-K sampling for text tokens. Default value: `25` */ top_k_text?: number; /** * Voice ID for the AI response. NAT = natural, VAR = variety. F = female, M = male. Ignored when voice_audio_url is provided. Default value: `"NATF2"` */ voice?: "NATF0" | "NATF1" | "NATF2" | "NATF3" | "NATM0" | "NATM1" | "NATM2" | "NATM3" | "VARF0" | "VARF1" | "VARF2" | "VARF3" | "VARF4" | "VARM0" | "VARM1" | "VARM2" | "VARM3" | "VARM4"; /** * URL to a voice sample audio for on-the-fly voice cloning. When provided, the AI responds in the cloned voice instead of the preset 'voice'. 10+ seconds of clear speech recommended. Billed at 2x rate. */ voice_audio_url?: string | Blob | File; }; export type ConvertFormatInput = { /** * URL of 3D file to convert (FBX, OBJ, GLB). Max size: 60MB. */ input_file_url: string | Blob | File; /** * Target output format. STL: 3D printing. USDZ: AR/iOS. FBX: animation. MP4/GIF: preview video/animation. */ output_format: "STL" | "USDZ" | "FBX" | "MP4" | "GIF"; }; export type ConvertFormatOutput = { /** * Converted file in the requested format. */ result_file: File; }; export type Coordinates = { /** * Height of the product in the image. */ height: number; /** * Width of the product in the image. */ width: number; /** * X coordinate of the product in the image. */ x: number; /** * Y coordinate of the product in the image. */ y: number; }; export type CosmosPredict25DistilledTextToVideoInput = { /** * A negative prompt to guide generation away from undesired content. Default value: `"The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality."` */ negative_prompt?: string; /** * Number of frames to generate. Must be between 9 and 93. Default value: `93` */ num_frames?: number; /** * Number of denoising steps. Distilled model works well with fewer steps. Default value: `10` */ num_inference_steps?: number; /** * The text prompt describing the video to generate. */ prompt: string; /** * Random seed for reproducible generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The format of the output video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the output video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; }; export type CosmosPredict25ImageToVideoInput = { /** * Classifier-free guidance scale. Higher values increase prompt adherence. Default value: `7` */ guidance_scale?: number; /** * URL of the input image to use as first frame. */ image_url: string | Blob | File; /** * A negative prompt to guide generation away from undesired content. Default value: `"The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality."` */ negative_prompt?: string; /** * Number of frames to generate. Must be between 9 and 93. Default value: `93` */ num_frames?: number; /** * Number of denoising steps. More steps yield higher quality but take longer. Default value: `35` */ num_inference_steps?: number; /** * The text prompt describing the video to generate. */ prompt: string; /** * Random seed for reproducible generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The format of the output video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the output video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; }; export type CosmosPredict25TextToVideoInput = { /** * Classifier-free guidance scale. Higher values increase prompt adherence. Default value: `7` */ guidance_scale?: number; /** * A negative prompt to guide generation away from undesired content. Default value: `"The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality."` */ negative_prompt?: string; /** * Number of frames to generate. Must be between 9 and 93. Default value: `93` */ num_frames?: number; /** * Number of denoising steps. More steps yield higher quality but take longer. Default value: `35` */ num_inference_steps?: number; /** * The text prompt describing the video to generate. */ prompt: string; /** * Random seed for reproducible generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The format of the output video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the output video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; }; export type CosmosPredict25VideoToVideoInput = { /** * Classifier-free guidance scale. Higher values increase prompt adherence. Default value: `7` */ guidance_scale?: number; /** * A negative prompt to guide generation away from undesired content. Default value: `"The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality."` */ negative_prompt?: string; /** * Number of frames to generate. Must be between 9 and 93. Default value: `93` */ num_frames?: number; /** * Number of denoising steps. More steps yield higher quality but take longer. Default value: `35` */ num_inference_steps?: number; /** * The text prompt describing the video to generate. */ prompt: string; /** * Random seed for reproducible generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The format of the output video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the output video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * URL of the input video to use as conditioning. */ video_url: string | Blob | File; }; export type CreateCharacterInput = { /** * Name for the character (1–80 characters). Refer to this name in prompts when using the character. */ name: string; /** * URL of an MP4 video (minimum 720p, max ~2.67:1 aspect ratio) to define the character. Videos exceeding 1080p are automatically scaled down. Non-standard aspect ratios are automatically padded to 16:9 (landscape) or 9:16 (portrait). Videos longer than 4 seconds are trimmed to the first 4 seconds. */ video_url: string | Blob | File; }; export type CreateCharacterOutput = { /** * API character ID (format char_...). Use this in character_ids when generating video. */ id: string; /** * The character name */ name: string; }; export type CreateVoiceInput = { /** * URL of the voice audio file. Supports .mp3/.wav audio or .mp4/.mov video. Duration must be 5-30 seconds with clean, single-voice audio. */ voice_url: string | Blob | File; }; export type CreateVoiceOutput = { /** * Unique identifier for the created voice */ voice_id: string; }; export type CreatifyAuroraInput = { /** * Guidance scale to be used for audio adherence. Default value: `2` */ audio_guidance_scale?: number; /** * The URL of the audio file to be used for video generation. */ audio_url: string | Blob | File; /** * Guidance scale to be used for text prompt adherence. Default value: `1` */ guidance_scale?: number; /** * The URL of the image file to be used for video generation. */ image_url: string | Blob | File; /** * A text prompt to guide the video generation process. */ prompt?: string; /** * The resolution of the generated video. Default value: `"720p"` */ resolution?: "480p" | "720p"; }; export type CreativeUpscalerInput = { /** * The URL to the additional embeddings to use for the upscaling. Default is None */ additional_embedding_url?: string | Blob | File; /** * The scale of the additional LORA model to use for the upscaling. Default is 1.0 Default value: `1` */ additional_lora_scale?: number; /** * The URL to the additional LORA model to use for the upscaling. Default is None */ additional_lora_url?: string | Blob | File; /** * The URL to the base model to use for the upscaling */ base_model_url?: string | Blob | File; /** * How much the output can deviate from the original Default value: `0.5` */ creativity?: number; /** * How much detail to add Default value: `1` */ detail?: number; /** * If set to true, the resulting image will be checked whether it includes any * potentially unsafe content. If it does, it will be replaced with a black * image. Default value: `true` */ enable_safety_checks?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The image to upscale. */ image_url: string | Blob | File; /** * The type of model to use for the upscaling. Default is SD_1_5 Default value: `"SD_1_5"` */ model_type?: "SD_1_5" | "SDXL"; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` */ negative_prompt?: string; /** * The number of inference steps to use for generating the image. The more steps * the better the image will be but it will also take longer to generate. Default value: `20` */ num_inference_steps?: number; /** * Allow for large uploads that could take a very long time. */ override_size_limits?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. If no prompt is provide BLIP2 will be used to generate a prompt. */ prompt?: string; /** * The suffix to add to the prompt. This is useful to add a common ending to all prompts such as 'high quality' etc or embedding tokens. Default value: `" high quality, highly detailed, high resolution, sharp"` */ prompt_suffix?: string; /** * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` */ scale?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * How much to preserve the shape of the original image Default value: `0.25` */ shape_preservation?: number; /** * If set to true, the image will not be processed by the CCSR model before * being processed by the creativity model. */ skip_ccsr?: boolean; }; export type CropImageInput = { /** * Height as percentage of image height (0-100) Default value: `100` */ height_percent?: number; /** * The URL of the image to crop */ image_url: string | Blob | File; /** * Output format for the cropped image Default value: `"png"` */ output_format?: "png" | "jpg" | "jpeg" | "webp"; /** * Width as percentage of image width (0-100) Default value: `100` */ width_percent?: number; /** * X coordinate as percentage of image width (0-100) */ x_percent?: number; /** * Y coordinate as percentage of image height (0-100) */ y_percent?: number; }; export type CrystalUpscaleInput = { /** * Creativity level for upscaling */ creativity?: number; /** * URL to the input image */ image_url: string | Blob | File; /** * Scale factor Default value: `2` */ scale_factor?: number; }; export type CrystalVideoUpscaleInput = { /** * Scale factor. The scale factor must be chosen such that the upscaled video does not exceed 5K resolution. Default value: `2` */ scale_factor?: number; /** * URL to the input video. */ video_url: string | Blob | File; }; export type Csm1bInput = { /** * The context to generate an audio from. */ context?: Array; /** * The text to generate an audio from. */ scene: Array; }; export type Csm1bOutput = { /** * The generated audio. */ audio: File | string; }; export type DavinciMagihumanInput = { /** * Optional URL of the driving audio for lipsync mode. If omitted, audio is generated from the prompt. */ audio_url?: string | Blob | File; /** * Duration of the generated video in seconds. Default value: `5` */ duration?: number; /** * If enabled, runs safety checks on the prompt and input image. Default value: `true` */ enable_safety_checker?: boolean; /** * Classifier-free guidance scale. Default value: `5` */ guidance_scale?: number; /** * URL of the reference image for image-to-video generation. */ image_url: string | Blob | File; /** * Number of denoising steps. Defaults to 8 for 256p and 32 for 540p/720p/1080p (base + SR). */ num_inference_steps?: number; /** * Text prompt describing the desired video content. */ prompt: string; /** * Output resolution. '256p' uses the official base-model 448x256 path. '1080p' uses the official base + 1080p super-resolution pipeline. '540p' and '720p' reuse that sharper 1080p SR path and downsample to the requested output size. Default value: `"256p"` */ resolution?: "256p" | "540p" | "720p" | "1080p"; /** * Random seed for reproducibility. */ seed?: number; }; export type DavinciMagihumanOutput = { /** * The seed used for generation. */ seed: number; /** * The generated video with synchronized audio. */ video?: File; }; export type deepfilternet3Output = { /** * The audio file that was enhanced. */ audio_file: AudioFile; /** * Timings for each step in the pipeline. */ timings: DeepFilterNetTimings; }; export type demucsInput = { /** * URL of the audio file to separate into stems */ audio_url: string | Blob | File; /** * Demucs model to use for separation Default value: `"htdemucs_6s"` */ model?: "htdemucs" | "htdemucs_ft" | "htdemucs_6s" | "hdemucs_mmi" | "mdx" | "mdx_extra" | "mdx_q" | "mdx_extra_q"; /** * Output audio format for the separated stems Default value: `"mp3"` */ output_format?: "wav" | "mp3"; /** * Overlap between segments (0.0 to 1.0). Higher values may improve quality but increase processing time. Default value: `0.25` */ overlap?: number; /** * Length in seconds of each segment for processing. Smaller values use less memory but may reduce quality. Default is model-specific. */ segment_length?: number; /** * Number of random shifts for equivariant stabilization. Higher values improve quality but increase processing time. Default value: `1` */ shifts?: number; /** * Specific stems to extract. If None, extracts all available stems. Available stems depend on model: vocals, drums, bass, other, guitar, piano (for 6s model) */ stems?: Array<"vocals" | "drums" | "bass" | "other" | "guitar" | "piano">; }; export type demucsOutput = { /** * Separated bass audio file */ bass?: File; /** * Separated drums audio file */ drums?: File; /** * Separated guitar audio file (only available for 6s models) */ guitar?: File; /** * Separated other instruments audio file */ other?: File; /** * Separated piano audio file (only available for 6s models) */ piano?: File; /** * Separated vocals audio file */ vocals?: File; }; export type DepthAnythingVideoInput = { /** * Colormap for depth visualization. 'turbo' (recommended) shows near=warm, far=cool. 'grayscale' for raw normalized depth. 'inferno'/'magma' for perceptually uniform. 'viridis' for colorblind-friendly. Default value: `"grayscale"` */ colormap?: "grayscale" | "turbo" | "inferno" | "magma" | "viridis"; /** * Export raw float32 depths as .npz file with: 'depths' [N,H,W], 'min_depth', 'max_depth', 'fps', 'model', 'shape'. */ include_raw_depths?: boolean; /** * Max frames to process. None = all frames. */ max_frames?: number; /** * Depth estimation model size. VDA-Large = best quality, VDA-Small = fastest. Default value: `"VDA-Large"` */ model?: "VDA-Small" | "VDA-Base" | "VDA-Large"; /** * Output video FPS. None = same as input. */ output_fps?: number; /** * Output resolution. 'auto' preserves input (max 1080p). Options: 'auto', '360p', '480p', '720p', '1080p'. Default value: `"auto"` */ resolution?: "auto" | "360p" | "480p" | "720p" | "1080p"; /** * Output original | depth comparison video. */ side_by_side?: boolean; /** * URL of the input video to estimate depth for. */ video_url: string | Blob | File; }; export type DepthAnythingVideoOutput = { /** * Raw depth values as .npz (if include_raw_depths=True). */ raw_depths?: File; /** * Depth visualization video (MP4, H.264). */ video: File; }; export type DepthLoraInput = { /** * The image to use for control lora. This is used to control the style of the generated image. */ control_lora_image_url: string | Blob | File; /** * The strength of the control lora. Default value: `1` */ control_lora_strength?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for image-to-image generation. */ image_url?: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * If set to true, the input image will be preprocessed to extract depth information. * This is useful for generating depth maps from images. Default value: `true` */ preprocess_depth?: boolean; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type DepthMapInput = { /** * a Default value: `6.283185307179586` */ a?: number; /** * bg_th Default value: `0.1` */ bg_th?: number; /** * depth_and_normal */ depth_and_normal?: boolean; /** * Input image url. */ image_url: string | Blob | File; }; export type DesaturateInput = { /** * Desaturation factor Default value: `1` */ desaturate_factor?: number; /** * Desaturation method Default value: `"luminance (Rec.709)"` */ desaturate_method?: "luminance (Rec.709)" | "luminance (Rec.601)" | "average" | "lightness"; /** * URL of image to process */ image_url: string | Blob | File; }; export type DetectionInput = { /** * Whether to combine points into a single point for point detection. This has no effect for bbox detection or gaze detection. */ combine_points?: boolean; /** * Text description of what to detect */ detection_prompt: string; /** * Image URL to be processed */ image_url: string | Blob | File; /** * Whether to show visualization for detection Default value: `true` */ show_visualization?: boolean; /** * Type of detection to perform */ task_type: "bbox_detection" | "point_detection" | "gaze_detection"; /** * Whether to use ensemble for gaze detection */ use_ensemble?: boolean; }; export type DetectionOutput = { /** * Output image with detection visualization */ image?: Image; /** * Detection results as text */ text_output: string; }; export type DevImageToImageInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The URL of the image to generate an image from. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `40` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength of the initial image. Higher strength values are better for this model. Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type DevReduxInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to generate an image from. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type DialogueBlock = { /** * The dialogue text */ text: string; /** * The name or the ID of the voice to be used for the generation. */ voice: string; }; export type DiaOutput = { /** * The generated speech audio */ audio: File; }; export type DiarizationSegment = { /** * Speaker ID of the segment */ speaker: string; /** * Start and end timestamp of the segment */ timestamp: Array; }; export type DiaTtsInput = { /** * The text to be converted to speech. */ text: string; }; export type DictOutput = { /** * The value of the measurement. */ value?: unknown; }; export type DifferentialDiffusionInput = { /** * Base shift for the scheduled timesteps Default value: `0.5` */ base_shift?: number; /** * URL of change map. */ change_map_image_url: string | Blob | File; /** * The LoRAs to use for the image generation which use a control image. You can use any number of LoRAs * and they will be merged together to generate the final image. */ control_loras?: Array; /** * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. */ controlnet_unions?: Array; /** * The controlnets to use for the image generation. Only one controlnet is supported at the moment. */ controlnets?: Array; /** * EasyControl Inputs to use for image generation. */ easycontrols?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Use an image input to influence the generation. Can be used to fill images in masked areas. */ fill_image?: ImageFillInput; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use as initial image. */ image_url: string | Blob | File; /** * IP-Adapter to use for image generation. */ ip_adapters?: Array; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * Max shift for the scheduled timesteps Default value: `1.15` */ max_shift?: number; /** * The alpha value for NAG. This value is used as a final weighting * factor for steering the normalized guidance (positive and negative prompts) * in the direction of the positive prompt. Higher values will result in less * steering on the normalized guidance where lower values will result in * considering the positive prompt guidance more. Default value: `0.25` */ nag_alpha?: number; /** * The proportion of steps to apply NAG. After the specified proportion * of steps has been iterated, the remaining steps will use original * attention processors in FLUX. Default value: `0.25` */ nag_end?: number; /** * The scale for NAG. Higher values will result in a image that is more distant * to the negative prompt. Default value: `3` */ nag_scale?: number; /** * The tau for NAG. Controls the normalization of the hidden state. * Higher values will result in a less aggressive normalization, * but may also lead to unexpected changes with respect to the original image. * Not recommended to change this value. Default value: `2.5` */ nag_tau?: number; /** * Negative prompt to steer the image generation away from unwanted features. * By default, we will be using NAG for processing the negative prompt. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ real_cfg_scale?: number; /** * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ reference_end?: number; /** * URL of Image for Reference-Only */ reference_image_url?: string | Blob | File; /** * The percentage of the total timesteps when the reference guidance is to bestarted. */ reference_start?: number; /** * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ reference_strength?: number; /** * Scheduler for the denoising process. Default value: `"euler"` */ scheduler?: "euler" | "dpmpp_2m"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Sigmas schedule for the denoising process. */ sigma_schedule?: string; /** * The strength to use for differential diffusion. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Specifies whether beta sigmas ought to be used. */ use_beta_schedule?: boolean; /** * Uses CFG-zero init sampling as in https://arxiv.org/abs/2503.18886. */ use_cfg_zero?: boolean; /** * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. * If using XLabs IP-Adapter v1, this will be turned on!. */ use_real_cfg?: boolean; }; export type diffrhythmInput = { /** * The CFG strength to use for the music generation. Default value: `4` */ cfg_strength?: number; /** * The prompt to generate the song from. Must have two sections. Sections start with either [chorus] or a [verse]. */ lyrics: string; /** * The duration of the music to generate. Default value: `"95s"` */ music_duration?: "95s" | "285s"; /** * The number of inference steps to use for the music generation. Default value: `32` */ num_inference_steps?: number; /** * The URL of the reference audio to use for the music generation. */ reference_audio_url?: string | Blob | File; /** * The scheduler to use for the music generation. Default value: `"euler"` */ scheduler?: "euler" | "midpoint" | "rk4" | "implicit_adams"; /** * The style prompt to use for the music generation. */ style_prompt?: string; }; export type DissolveInput = { /** * Dissolve blend factor Default value: `0.5` */ dissolve_factor?: number; /** * URL of second image for dissolve */ dissolve_image_url: string | Blob | File; /** * URL of image to process */ image_url: string | Blob | File; }; export type DistilledExtendVideoInput = { /** * The aspect ratio of the video. Default value: `"auto"` */ aspect_ratio?: "9:16" | "1:1" | "16:9" | "auto"; /** * The constant rate factor (CRF) to compress input media with. Compressed input media more closely matches the model's training data, which can improve motion quality. Default value: `29` */ constant_rate_factor?: number; /** * Whether to use a detail pass. If True, the model will perform a second pass to refine the video and enhance details. This incurs a 2.0x cost multiplier on the base price. */ enable_detail_pass?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to expand the prompt using a language model. */ expand_prompt?: boolean; /** * Number of inference steps during the first pass. Default value: `8` */ first_pass_num_inference_steps?: number; /** * The frame rate of the video. Default value: `24` */ frame_rate?: number; /** * LoRA weights to use for generation */ loras?: Array; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * The number of frames in the video. Default value: `121` */ num_frames?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Whether to reverse the video. */ reverse_video?: boolean; /** * Number of inference steps during the second pass. Default value: `8` */ second_pass_num_inference_steps?: number; /** * The number of inference steps to skip in the initial steps of the second pass. By skipping some steps at the beginning, the second pass can focus on smaller details instead of larger changes. Default value: `5` */ second_pass_skip_initial_steps?: number; /** * Random seed for generation */ seed?: number; /** * The factor for adaptive instance normalization (AdaIN) applied to generated video chunks after the first. This can help deal with a gradual increase in saturation/contrast in the generated video by normalizing the color distribution across the video. A high value will ensure the color distribution is more consistent across the video, while a low value will allow for more variation in color distribution. Default value: `0.5` */ temporal_adain_factor?: number; /** * The compression ratio for tone mapping. This is used to compress the dynamic range of the video to improve visual quality. A value of 0.0 means no compression, while a value of 1.0 means maximum compression. */ tone_map_compression_ratio?: number; /** * Video to be extended. */ video: ExtendVideoConditioningInput; }; export type DistilledImageToVideoInput = { /** * The aspect ratio of the video. Default value: `"auto"` */ aspect_ratio?: "9:16" | "1:1" | "16:9" | "auto"; /** * The constant rate factor (CRF) to compress input media with. Compressed input media more closely matches the model's training data, which can improve motion quality. Default value: `29` */ constant_rate_factor?: number; /** * Whether to use a detail pass. If True, the model will perform a second pass to refine the video and enhance details. This incurs a 2.0x cost multiplier on the base price. */ enable_detail_pass?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to expand the prompt using a language model. */ expand_prompt?: boolean; /** * Number of inference steps during the first pass. Default value: `8` */ first_pass_num_inference_steps?: number; /** * The frame rate of the video. Default value: `24` */ frame_rate?: number; /** * Image URL for Image-to-Video task */ image_url: string | Blob | File; /** * LoRA weights to use for generation */ loras?: Array; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * The number of frames in the video. Default value: `121` */ num_frames?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Whether to reverse the video. */ reverse_video?: boolean; /** * Number of inference steps during the second pass. Default value: `8` */ second_pass_num_inference_steps?: number; /** * The number of inference steps to skip in the initial steps of the second pass. By skipping some steps at the beginning, the second pass can focus on smaller details instead of larger changes. Default value: `5` */ second_pass_skip_initial_steps?: number; /** * Random seed for generation */ seed?: number; /** * The factor for adaptive instance normalization (AdaIN) applied to generated video chunks after the first. This can help deal with a gradual increase in saturation/contrast in the generated video by normalizing the color distribution across the video. A high value will ensure the color distribution is more consistent across the video, while a low value will allow for more variation in color distribution. Default value: `0.5` */ temporal_adain_factor?: number; /** * The compression ratio for tone mapping. This is used to compress the dynamic range of the video to improve visual quality. A value of 0.0 means no compression, while a value of 1.0 means maximum compression. */ tone_map_compression_ratio?: number; }; export type DistilledMultiConditioningVideoInput = { /** * The aspect ratio of the video. Default value: `"auto"` */ aspect_ratio?: "9:16" | "1:1" | "16:9" | "auto"; /** * The constant rate factor (CRF) to compress input media with. Compressed input media more closely matches the model's training data, which can improve motion quality. Default value: `29` */ constant_rate_factor?: number; /** * Whether to use a detail pass. If True, the model will perform a second pass to refine the video and enhance details. This incurs a 2.0x cost multiplier on the base price. */ enable_detail_pass?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to expand the prompt using a language model. */ expand_prompt?: boolean; /** * Number of inference steps during the first pass. Default value: `8` */ first_pass_num_inference_steps?: number; /** * The frame rate of the video. Default value: `24` */ frame_rate?: number; /** * URL of images to use as conditioning */ images?: Array; /** * LoRA weights to use for generation */ loras?: Array; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * The number of frames in the video. Default value: `121` */ num_frames?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Whether to reverse the video. */ reverse_video?: boolean; /** * Number of inference steps during the second pass. Default value: `8` */ second_pass_num_inference_steps?: number; /** * The number of inference steps to skip in the initial steps of the second pass. By skipping some steps at the beginning, the second pass can focus on smaller details instead of larger changes. Default value: `5` */ second_pass_skip_initial_steps?: number; /** * Random seed for generation */ seed?: number; /** * The factor for adaptive instance normalization (AdaIN) applied to generated video chunks after the first. This can help deal with a gradual increase in saturation/contrast in the generated video by normalizing the color distribution across the video. A high value will ensure the color distribution is more consistent across the video, while a low value will allow for more variation in color distribution. Default value: `0.5` */ temporal_adain_factor?: number; /** * The compression ratio for tone mapping. This is used to compress the dynamic range of the video to improve visual quality. A value of 0.0 means no compression, while a value of 1.0 means maximum compression. */ tone_map_compression_ratio?: number; /** * Videos to use as conditioning */ videos?: Array; }; export type docresInput = { /** * URL of image to be used for relighting */ image_url: string | Blob | File; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Task to perform */ task: "deshadowing" | "appearance" | "deblurring" | "binarization"; }; export type DodgeBurnInput = { /** * Dodge and burn intensity Default value: `0.5` */ dodge_burn_intensity?: number; /** * Dodge and burn mode Default value: `"dodge"` */ dodge_burn_mode?: "dodge" | "burn" | "dodge_and_burn" | "burn_and_dodge" | "color_dodge" | "color_burn" | "linear_dodge" | "linear_burn"; /** * URL of image to process */ image_url: string | Blob | File; }; export type DrctSuperResolutionInput = { /** * URL of the image to upscale. */ image_url: string | Blob | File; /** * Upscaling factor. Default value: `4` */ upscale_factor?: number; }; export type DreamActor2Input = { /** * The URL of the reference image to animate. Supports real people, animation, pets, etc. Format: jpeg, jpg or png. Max size: 4.7 MB. Resolution: between 480x480 and 1920x1080 (larger images will be proportionally reduced). */ image_url: string | Blob | File; /** * Whether to crop the first second of the output video. The output has a 1-second transition at the beginning; enable this to remove it. Default value: `true` */ trim_first_second?: boolean; /** * The URL of the driving template video providing motion, facial expressions, and lip movement reference. Max duration: 30 seconds. Format: mp4, mov or webm. Resolution: between 200x200 and 2048x1440. Supports full face and body driving. */ video_url: string | Blob | File; }; export type DreaminaInput = { /** * Whether to use an LLM to enhance the prompt */ enhance_prompt?: boolean; /** * The size of the generated image. Width and height must be between 512 and 2048. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The text prompt used to generate the image */ prompt: string; /** * Random seed to control the stochasticity of image generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type dreamoInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * URL of first reference image to use for generation. */ first_image_url?: string | Blob | File; /** * Task for first reference image (ip/id/style). Default value: `"ip"` */ first_reference_task?: "ip" | "id" | "style"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `12` */ num_inference_steps?: number; /** * The prompt to generate an image from. */ prompt: string; /** * Resolution for reference images. Default value: `512` */ ref_resolution?: number; /** * URL of second reference image to use for generation. */ second_image_url?: string | Blob | File; /** * Task for second reference image (ip/id/style). Default value: `"ip"` */ second_reference_task?: "ip" | "id" | "style"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; /** * The weight of the CFG loss. Default value: `1` */ true_cfg?: number; }; export type Dreamomni2EditInput = { /** * List of URLs of input images for editing. */ image_urls: Array; /** * The prompt to edit the image. */ prompt: string; }; export type DreamshaperImageToImageInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The Dreamshaper model to use. */ model_name?: "Lykon/dreamshaper-xl-1-0" | "Lykon/dreamshaper-xl-v2-turbo" | "Lykon/dreamshaper-8"; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type DreamshaperInpaintingInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The Dreamshaper model to use. */ model_name?: "Lykon/dreamshaper-xl-1-0" | "Lykon/dreamshaper-xl-v2-turbo" | "Lykon/dreamshaper-8"; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type dreamshaperInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The list of LoRA weights to use. */ loras?: Array; /** * The Dreamshaper model to use. */ model_name?: "Lykon/dreamshaper-xl-1-0" | "Lykon/dreamshaper-xl-v2-turbo" | "Lykon/dreamshaper-8"; /** * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type DubbingAudioOutput = { /** * The dubbed audio file. */ audio: File; /** * The target language of the dubbed content */ target_lang: string; }; export type dubbingInput = { /** * Whether to lip sync the audio to the video Default value: `true` */ do_lipsync?: boolean; /** * Target language to dub the video to Default value: `"hindi"` */ target_language?: "hindi" | "turkish" | "english"; /** * Input video URL to be dubbed. */ video_url: string | Blob | File; }; export type DubbingVideoOutput = { /** * The target language of the dubbed content */ target_lang: string; /** * The dubbed video file. Will be populated if video_url was provided in the request. */ video: File; }; export type dwposeInput = { /** * Mode of drawing the pose on the image. Options are: 'full-pose', 'body-pose', 'face-pose', 'hand-pose', 'face-hand-mask', 'face-mask', 'hand-mask'. Default value: `"body-pose"` */ draw_mode?: "full-pose" | "body-pose" | "face-pose" | "hand-pose" | "face-hand-mask" | "face-mask" | "hand-mask"; /** * URL of the image to be processed */ image_url: string | Blob | File; }; export type DwposeVideoInput = { /** * Mode of drawing the pose on the video. Options are: 'full-pose', 'body-pose', 'face-pose', 'hand-pose', 'face-hand-mask', 'face-mask', 'hand-mask'. Default value: `"body-pose"` */ draw_mode?: "full-pose" | "body-pose" | "face-pose" | "hand-pose" | "face-hand-mask" | "face-mask" | "hand-mask"; /** * URL of video to be used for pose estimation */ video_url: string | Blob | File; }; export type DynamicMask = { /** * URL of the image for Dynamic Brush Application Area (Mask image created by users using the motion brush) */ mask_url: string | Blob | File; /** * List of trajectories */ trajectories?: Array; }; export type EasyControlWeight = { /** * URL to safetensor weights of control method to be applied. Can also be one of `canny`, `depth`, `hedsketch`, `inpainting`, `pose`, `seg`, `subject`, `ghibli` */ control_method_url: string | Blob | File; /** * Control type of the image. Must be one of `spatial` or `subject`. */ image_control_type: "subject" | "spatial"; /** * URL of an image to use as a control */ image_url: string | Blob | File; /** * Scale for the control method. Default value: `1` */ scale?: number; }; export type EchomimicV3Input = { /** * The audio guidance scale to use for the video generation. Default value: `2.5` */ audio_guidance_scale?: number; /** * The URL of the audio to use as a reference for the video generation. */ audio_url: string | Blob | File; /** * The guidance scale to use for the video generation. Default value: `4.5` */ guidance_scale?: number; /** * The URL of the image to use as a reference for the video generation. */ image_url: string | Blob | File; /** * The negative prompt to use for the video generation. Default value: `""` */ negative_prompt?: string; /** * The number of frames to generate at once. Default value: `121` */ num_frames_per_generation?: number; /** * The prompt to use for the video generation. */ prompt: string; /** * The seed to use for the video generation. */ seed?: number; }; export type EditImageInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the image generation. Default value: `4.5` */ guidance_scale?: number; /** * The size of the generated image. If None, uses the input image dimensions. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images to edit. */ image_urls: Array; /** * The negative prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image with. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type EditImageLoraInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the image generation. Default value: `4.5` */ guidance_scale?: number; /** * The size of the generated image. If None, uses the input image dimensions. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images to edit. */ image_urls: Array; /** * The LoRAs to use for the image generation. You can use up to 3 LoRAs and they will be merged together to generate the final image. */ loras?: Array; /** * The negative prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image with. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type edittoInput = { /** * Acceleration to use for inference. Options are 'none' or 'regular'. Accelerated inference will very slightly affect output, but will be significantly faster. Default value: `regular` */ acceleration?: "none" | "low" | "regular"; /** * Aspect ratio of the generated video. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "1:1" | "9:16"; /** * If true, the model will automatically temporally downsample the video to an appropriate frame length for the model, then will interpolate it back to the original frame length. */ enable_auto_downsample?: boolean; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * Frames per second of the generated video. Must be between 5 to 30. Ignored if match_input_frames_per_second is true. Default value: `16` */ frames_per_second?: number; /** * Guidance scale for classifier-free guidance. Higher values encourage the model to generate images closely related to the text prompt. Default value: `5` */ guidance_scale?: number; /** * If true, the frames per second of the generated video will match the input video. If false, the frames per second will be determined by the frames_per_second parameter. */ match_input_frames_per_second?: boolean; /** * If true, the number of frames in the generated video will match the number of frames in the input video. If false, the number of frames will be determined by the num_frames parameter. */ match_input_num_frames?: boolean; /** * Negative prompt for video generation. Default value: `"letterboxing, borders, black bars, bright colors, overexposed, static, blurred details, subtitles, style, artwork, painting, picture, still, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, malformed limbs, fused fingers, still picture, cluttered background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * Number of frames to generate. Must be between 81 to 241 (inclusive). Default value: `81` */ num_frames?: number; /** * Number of inference steps for sampling. Higher values give better quality but take longer. Default value: `30` */ num_inference_steps?: number; /** * Number of frames to interpolate between the original frames. A value of 0 means no interpolation. */ num_interpolated_frames?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the generated video. Default value: `"auto"` */ resolution?: "auto" | "240p" | "360p" | "480p" | "580p" | "720p"; /** * If true, also return a ZIP file containing all generated frames. */ return_frames_zip?: boolean; /** * Sampler to use for video generation. Default value: `"unipc"` */ sampler?: "unipc" | "dpm++" | "euler"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; /** * Shift parameter for video generation. Default value: `5` */ shift?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Temporal downsample factor for the video. This is an integer value that determines how many frames to skip in the video. A value of 0 means no downsampling. For each downsample factor, one upsample factor will automatically be applied. */ temporal_downsample_factor?: number; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * URL to the source video file. Required for inpainting. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type edittoOutput = { /** * ZIP archive of all video frames if requested. */ frames_zip?: File; /** * The prompt used for generation. */ prompt: string; /** * The seed used for generation. */ seed: number; /** * The generated image to video file. */ video: VideoFile; }; export type EffectInput = { /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "8"; /** * The effect to apply to the video */ effect: "Kiss Me AI" | "Kiss" | "Muscle Surge" | "Warmth of Jesus" | "Anything, Robot" | "The Tiger Touch" | "Hug" | "Holy Wings" | "Microwave" | "Zombie Mode" | "Squid Game" | "Baby Face" | "Black Myth: Wukong" | "Long Hair Magic" | "Leggy Run" | "Fin-tastic Mermaid" | "Punch Face" | "Creepy Devil Smile" | "Thunder God" | "Eye Zoom Challenge" | "Who's Arrested?" | "Baby Arrived" | "Werewolf Rage" | "Bald Swipe" | "BOOM DROP" | "Huge Cutie" | "Liquid Metal" | "Sharksnap!" | "Dust Me Away" | "3D Figurine Factor" | "Bikini Up" | "My Girlfriends" | "My Boyfriends" | "Subject 3 Fever" | "Earth Zoom" | "Pole Dance" | "Vroom Dance" | "GhostFace Terror" | "Dragon Evoker" | "Skeletal Bae" | "Summoning succubus" | "Halloween Voodoo Doll" | "3D Naked-Eye AD" | "Package Explosion" | "Dishes Served" | "Ocean ad" | "Supermarket AD" | "Tree doll" | "Come Feel My Abs" | "The Bicep Flex" | "London Elite Vibe" | "Flora Nymph Gown" | "Christmas Costume" | "It's Snowy" | "Reindeer Cruiser" | "Snow Globe Maker" | "Pet Christmas Outfit" | "Adopt a Polar Pal" | "Cat Christmas Box" | "Starlight Gift Box" | "Xmas Poster" | "Pet Christmas Tree" | "City Santa Hat" | "Stocking Sweetie" | "Christmas Night" | "Xmas Front Page Karma" | "Grinch's Xmas Hijack" | "Giant Product" | "Truck Fashion Shoot" | "Beach AD" | "Shoal Surround" | "Mechanical Assembly" | "Lighting AD" | "Billboard AD" | "Product close-up" | "Parachute Delivery" | "Dreamlike Cloud" | "Macaron Machine" | "Poster AD" | "Truck AD" | "Graffiti AD" | "3D Figurine Factory" | "The Exclusive First Class" | "Art Zoom Challenge" | "I Quit" | "Hitchcock Dolly Zoom" | "Smell the Lens" | "I believe I can fly" | "Strikout Dance" | "Pixel World" | "Mint in Box" | "Hands up, Hand" | "Flora Nymph Go" | "Somber Embrace" | "Beam me up" | "Suit Swagger"; /** * Optional URL of the image to use as the first frame. If not provided, generates from text */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * The resolution of the generated video. Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; }; export type ElementInput = { /** * The frontal image of the element (main view). Optional. */ frontal_image_url?: string | Blob | File; /** * Additional reference images from different angles. 0-3 images supported. Optional. */ reference_image_urls?: Array; }; export type ElevenlabsAudioIsolationInput = { /** * URL of the audio file to isolate voice from */ audio_url?: string | Blob | File; /** * Video file to use for audio isolation. Either `audio_url` or `video_url` must be provided. */ video_url?: string | Blob | File; }; export type ElevenlabsDubbingInput = { /** * URL of the audio file to dub. Either audio_url or video_url must be provided. */ audio_url?: string | Blob | File; /** * Whether to use the highest resolution for dubbing. Default value: `true` */ highest_resolution?: boolean; /** * Number of speakers in the audio. If not provided, will be auto-detected. */ num_speakers?: number; /** * Source language code. If not provided, will be auto-detected. */ source_lang?: string; /** * Target language code for dubbing (ISO 639-1) */ target_lang: string; /** * URL of the video file to dub. Either audio_url or video_url must be provided. If both are provided, video_url takes priority. */ video_url?: string | Blob | File; }; export type ElevenlabsMusicInput = { /** * The composition plan for the music */ composition_plan?: MusicCompositionPlan; /** * If true, guarantees that the generated song will be instrumental. If false, the song may or may not be instrumental depending on the prompt. Can only be used with prompt. */ force_instrumental?: boolean; /** * The length of the song to generate in milliseconds. Used only in conjunction with prompt. Must be between 3000ms and 600000ms. Optional - if not provided, the model will choose a length based on the prompt. */ music_length_ms?: number; /** * Output format of the generated audio. Formatted as codec_sample_rate_bitrate. So an mp3 with 22.05kHz sample rate at 32kbs is represented as mp3_22050_32. MP3 with 192kbps bitrate requires you to be subscribed to Creator tier or above. PCM with 44.1kHz sample rate requires you to be subscribed to Pro tier or above. Note that the μ-law format (sometimes written mu-law, often approximated as u-law) is commonly used for Twilio audio inputs. Default value: `"mp3_44100_128"` */ output_format?: "mp3_22050_32" | "mp3_44100_32" | "mp3_44100_64" | "mp3_44100_96" | "mp3_44100_128" | "mp3_44100_192" | "pcm_8000" | "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100" | "pcm_48000" | "ulaw_8000" | "alaw_8000" | "opus_48000_32" | "opus_48000_64" | "opus_48000_96" | "opus_48000_128" | "opus_48000_192"; /** * The text prompt describing the music to generate */ prompt?: string; /** * Controls how strictly section durations in the composition_plan are enforced. It will only have an effect if it is used with composition_plan. When set to true, the model will precisely respect each section's duration_ms from the plan. When set to false, the model may adjust individual section durations which will generally lead to better generation quality and improved latency, while always preserving the total song duration from the plan. Default value: `true` */ respect_sections_durations?: boolean; }; export type ElevenlabsSoundEffectsV2Input = { /** * Duration in seconds (0.5-22). If None, optimal duration will be determined from prompt. */ duration_seconds?: number; /** * Whether to create a sound effect that loops smoothly. */ loop?: boolean; /** * Output format of the generated audio. Formatted as codec_sample_rate_bitrate. Default value: `"mp3_44100_128"` */ output_format?: "mp3_22050_32" | "mp3_44100_32" | "mp3_44100_64" | "mp3_44100_96" | "mp3_44100_128" | "mp3_44100_192" | "pcm_8000" | "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100" | "pcm_48000" | "ulaw_8000" | "alaw_8000" | "opus_48000_32" | "opus_48000_64" | "opus_48000_96" | "opus_48000_128" | "opus_48000_192"; /** * How closely to follow the prompt (0-1). Higher values mean less variation. Default value: `0.3` */ prompt_influence?: number; /** * The text describing the sound effect to generate */ text: string; }; export type ElevenlabsSpeechToTextInput = { /** * URL of the audio file to transcribe */ audio_url: string | Blob | File; /** * Whether to annotate who is speaking Default value: `true` */ diarize?: boolean; /** * Language code of the audio */ language_code?: string; /** * Tag audio events like laughter, applause, etc. Default value: `true` */ tag_audio_events?: boolean; }; export type ElevenlabsSpeechToTextScribeV2Input = { /** * URL of the audio file to transcribe */ audio_url: string | Blob | File; /** * Whether to annotate who is speaking Default value: `true` */ diarize?: boolean; /** * Words or sentences to bias the model towards transcribing. Up to 100 keyterms, max 50 characters each. Adds 30% premium over base transcription price. */ keyterms?: Array; /** * Language code of the audio */ language_code?: string; /** * Tag audio events like laughter, applause, etc. Default value: `true` */ tag_audio_events?: boolean; }; export type ElevenlabsTextToDialogueElevenV3Input = { /** * A list of dialogue inputs, each containing text and a voice ID which will be converted into speech. */ inputs: Array; /** * Language code (ISO 639-1) used to enforce a language for the model. An error will be returned if language code is not supported by the model. */ language_code?: string; /** * A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request */ pronunciation_dictionary_locators?: Array; /** * Random seed for reproducibility. */ seed?: number; /** * Determines how stable the voice is and the randomness between each generation. Lower values introduce broader emotional range for the voice. Higher values can result in a monotonous voice with limited emotion. Must be one of 0.0, 0.5, 1.0, else it will be rounded to the nearest value. */ stability?: number; /** * This setting boosts the similarity to the original speaker. Using this setting requires a slightly higher computational load, which in turn increases latency. */ use_speaker_boost?: boolean; }; export type ElevenlabsTtsElevenV3Input = { /** * This parameter controls text normalization with three modes: 'auto', 'on', and 'off'. When set to 'auto', the system will automatically decide whether to apply text normalization (e.g., spelling out numbers). With 'on', text normalization will always be applied, while with 'off', it will be skipped. Default value: `"auto"` */ apply_text_normalization?: "auto" | "on" | "off"; /** * Language code (ISO 639-1) used to enforce a language for the model. */ language_code?: string; /** * Voice stability (0-1) Default value: `0.5` */ stability?: number; /** * The text to convert to speech */ text: string; /** * Whether to return timestamps for each word in the generated speech */ timestamps?: boolean; /** * The voice to use for speech generation Default value: `"Rachel"` */ voice?: string; }; export type ElevenlabsTtsElevenV3Output = { /** * The generated audio file */ audio: File; /** * Timestamps for each word in the generated speech. Only returned if `timestamps` is set to True in the request. */ timestamps?: Array; }; export type ElevenlabsTtsTurboV25Input = { /** * This parameter controls text normalization with three modes: 'auto', 'on', and 'off'. When set to 'auto', the system will automatically decide whether to apply text normalization (e.g., spelling out numbers). With 'on', text normalization will always be applied, while with 'off', it will be skipped. Default value: `"auto"` */ apply_text_normalization?: "auto" | "on" | "off"; /** * Language code (ISO 639-1) used to enforce a language for the model. An error will be returned if language code is not supported by the model. */ language_code?: string; /** * The text that comes after the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation. */ next_text?: string; /** * The text that came before the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation. */ previous_text?: string; /** * Similarity boost (0-1) Default value: `0.75` */ similarity_boost?: number; /** * Speech speed (0.7-1.2). Values below 1.0 slow down the speech, above 1.0 speed it up. Extreme values may affect quality. Default value: `1` */ speed?: number; /** * Voice stability (0-1) Default value: `0.5` */ stability?: number; /** * Style exaggeration (0-1) */ style?: number; /** * The text to convert to speech */ text: string; /** * Whether to return timestamps for each word in the generated speech */ timestamps?: boolean; /** * The voice to use for speech generation Default value: `"Rachel"` */ voice?: string; }; export type ElevenlabsVoiceChangerInput = { /** * The input audio file */ audio_url: string | Blob | File; /** * Output format of the generated audio. Formatted as codec_sample_rate_bitrate. Default value: `"mp3_44100_128"` */ output_format?: "mp3_22050_32" | "mp3_44100_32" | "mp3_44100_64" | "mp3_44100_96" | "mp3_44100_128" | "mp3_44100_192" | "pcm_8000" | "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100" | "pcm_48000" | "ulaw_8000" | "alaw_8000" | "opus_48000_32" | "opus_48000_64" | "opus_48000_96" | "opus_48000_128" | "opus_48000_192"; /** * If set, will remove the background noise from your audio input using our audio isolation model. */ remove_background_noise?: boolean; /** * Random seed for reproducibility. */ seed?: number; /** * The voice to use for speech generation Default value: `"Rachel"` */ voice?: string; }; export type Embedding = { /** * URL or the path to the embedding weights. */ path: string; /** * The list of tokens to use for the embedding. */ tokens?: Array; }; export type EmbedItem = { /** * */ coordinates: Coordinates; /** * URL of the image. */ image_source?: string; }; export type EmbedProductInput = { /** * URL of the image. */ image_source?: string; /** * List of products to embed in the image. */ products?: Array; /** * Random seed for reproducibility. Default value: `5555` */ seed?: number; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; }; export type EmotionalStrengths = { /** * Strength of fear emotion */ afraid?: number; /** * Strength of anger emotion */ angry?: number; /** * Strength of calm emotion */ calm?: number; /** * Strength of disgust emotion */ disgusted?: number; /** * Strength of happiness emotion */ happy?: number; /** * Strength of melancholic emotion */ melancholic?: number; /** * Strength of sadness emotion */ sad?: number; /** * Strength of surprise emotion */ surprised?: number; }; export type Emu35ImageEditInput = { /** * The aspect ratio of the output image. Default value: `"auto"` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21"; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The image to edit. */ image_url: string | Blob | File; /** * The format of the output image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The resolution of the output image. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * The seed for the inference. */ seed?: number; /** * Whether to return the image in sync mode. */ sync_mode?: boolean; }; export type Emu35ImageInput = { /** * The aspect ratio of the output image. Default value: `"1:1"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21"; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The format of the output image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to create the image. */ prompt: string; /** * The resolution of the output image. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * The seed for the inference. */ seed?: number; /** * Whether to return the image in sync mode. */ sync_mode?: boolean; }; export type Emu35Output = { /** * The edited image. */ images: Array; /** * The seed for the inference. */ seed: number; }; export type EQBand = { /** * Center frequency in Hz (20-20000) */ frequency: number; /** * Gain in dB (-30 to 30, negative = cut, positive = boost) */ gain: number; /** * Bandwidth in Hz Default value: `100` */ width?: number; }; export type Era3dInput = { /** * Background removal Default value: `true` */ background_removal?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ cfg?: number; /** * Size of the image to crop to Default value: `400` */ crop_size?: number; /** * URL of the image to remove background from */ image_url: string | Blob | File; /** * Seed for random number generation Default value: `-1` */ seed?: number; /** * Number of steps to run the model for Default value: `40` */ steps?: number; }; export type Era3dOutput = { /** * Images with background removed */ images: Array; /** * Normal images with background removed */ normal_images: Array; /** * Seed used for random number generation */ seed: number; }; export type EraseByTextInput = { /** * The source image. */ image_url: string | Blob | File; /** * The name of the object to remove. */ object_name: string; }; export type EraserInput = { /** * Input Image to erase from */ image_url: string | Blob | File; /** * You can use this parameter to specify the type of the input mask from the list. 'manual' opttion should be used in cases in which the mask had been generated by a user (e.g. with a brush tool), and 'automatic' mask type should be used when mask had been generated by an algorithm like 'SAM'. Default value: `"manual"` */ mask_type?: "manual" | "automatic"; /** * The URL of the binary mask image that represents the area that will be cleaned. */ mask_url: string | Blob | File; /** * If set to true, attempts to preserve the alpha channel of the input image. */ preserve_alpha?: boolean; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type esrganInput = { /** * Upscaling a face */ face?: boolean; /** * Url to input image */ image_url: string | Blob | File; /** * Model to use for upscaling Default value: `"RealESRGAN_x4plus"` */ model?: "RealESRGAN_x4plus" | "RealESRGAN_x2plus" | "RealESRGAN_x4plus_anime_6B" | "RealESRGAN_x4_v3" | "RealESRGAN_x4_wdn_v3" | "RealESRGAN_x4_anime_v3"; /** * Output image format (png or jpeg) Default value: `"png"` */ output_format?: "png" | "jpeg"; /** * Rescaling factor Default value: `2` */ scale?: number; /** * Tile size. Default is 0, that is no tile. When encountering the out-of-GPU-memory issue, please specify it, e.g., 400 or 200 */ tile?: number; }; export type EvfSamInput = { /** * Apply Gaussian blur to the mask. Value determines kernel size (must be odd number) */ blur_mask?: number; /** * Expand/dilate the mask by specified pixels */ expand_mask?: number; /** * Fill holes in the mask using morphological operations */ fill_holes?: boolean; /** * URL of the input image */ image_url: string | Blob | File; /** * Output only the binary mask instead of masked image Default value: `true` */ mask_only?: boolean; /** * Areas to exclude from segmentation (will be subtracted from prompt results) */ negative_prompt?: string; /** * The prompt to generate segmentation from. */ prompt: string; /** * Invert the mask (background becomes foreground and vice versa) */ revert_mask?: boolean; /** * Enable semantic level segmentation for body parts, background or multi objects */ semantic_type?: boolean; /** * Use GroundingDINO instead of SAM for segmentation */ use_grounding_dino?: boolean; }; export type EvfSamOutput = { /** * The segmented output image */ image: File; }; export type ExtendInput = { /** * The URL of the audio file to alter. Must be a valid publicly accessible URL. */ audio_url: string | Blob | File; /** * Greater means more natural vocals. Lower means sharper instrumentals. We recommend 0.7. Default value: `0.7` */ balance_strength?: number; /** * Duration in seconds to crop from the selected side before extending from that side. */ crop_duration?: number; /** * Duration in seconds to extend the song. If not provided, will attempt to automatically determine. */ extend_duration?: number; /** * The lyrics sung in the generated song. An empty string will generate an instrumental track. */ lyrics_prompt?: string; /** * Generating 2 songs costs 1.5x the price of generating 1 song. Also, note that using the same seed may not result in identical songs if the number of songs generated is changed. Default value: `1` */ num_songs?: number; /** * The bit rate to use for mp3 and m4a formats. Not available for other formats. */ output_bit_rate?: "128" | "192" | "256" | "320"; /** * Default value: `"wav"` */ output_format?: "flac" | "mp3" | "wav" | "ogg" | "m4a"; /** * A description of the track you want to generate. This prompt will be used to automatically generate the tags and lyrics unless you manually set them. For example, if you set prompt and tags, then the prompt will be used to generate only the lyrics. */ prompt?: string; /** * Controls how strongly your prompt influences the output. Greater values adhere more to the prompt but sound less natural. (This is CFG.) Default value: `1.8` */ prompt_strength?: number; /** * The seed to use for generation. Will pick a random seed if not provided. Repeating a request with identical parameters (must use lyrics and tags, not prompt) and the same seed will generate the same song. */ seed?: number; /** * Add more to the beginning (left) or end (right) of the song */ side: "left" | "right"; /** * Tags/styles of the music to generate. You can view a list of all available tags at https://sonauto.ai/tag-explorer. */ tags?: Array; }; export type ExtendOutput = { /** * The generated audio files. */ audio: Array; /** * The duration in seconds that the song was extended by. */ extend_duration: number; /** * The lyrics used for generation. */ lyrics?: string; /** * The seed used for generation. This can be used to generate an identical song by passing the same parameters with this seed in a future request. */ seed: number; /** * The style tags used for generation. */ tags?: Array; }; export type ExtendVideoConditioningInput = { /** * Whether to limit the number of frames used from the video. If True, the `max_num_frames` parameter will be used to limit the number of frames. */ limit_num_frames?: boolean; /** * Maximum number of frames to use from the video. If None, all frames will be used. Default value: `1441` */ max_num_frames?: number; /** * Whether to resample the video to a specific FPS. If True, the `target_fps` parameter will be used to resample the video. */ resample_fps?: boolean; /** * Whether to reverse the video. This is useful for tasks where the video conditioning should be applied in reverse order. */ reverse_video?: boolean; /** * Frame number of the video from which the conditioning starts. Must be a multiple of 8. */ start_frame_num?: number; /** * Strength of the conditioning. 0.0 means no conditioning, 1.0 means full conditioning. Default value: `1` */ strength?: number; /** * Target FPS to resample the video to. Only relevant if `resample_fps` is True. Default value: `24` */ target_fps?: number; /** * URL of video to use as conditioning */ video_url: string | Blob | File; }; export type ExtendVideoInput = { /** * Aspect ratio of the generated video (16:9 or 9:16). Default value: `"16:9"` */ aspect_ratio?: "9:16" | "16:9"; /** * Whether to expand the prompt using the model's own capabilities. Default value: `true` */ expand_prompt?: boolean; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * Number of inference steps Default value: `40` */ num_inference_steps?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video (480p or 720p). Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for generation */ seed?: number; /** * Video to be extended. */ video: VideoConditioningInput; }; export type ExtendVideoOutput = { /** * The prompt used for generation. */ prompt: string; /** * The seed used for generation. */ seed: number; /** * The generated video file. */ video: File; }; export type ExtractAudioInput = { /** * Audio bitrate Default value: `"192k"` */ audio_bitrate?: "128k" | "192k" | "256k" | "320k"; /** * Output audio format Default value: `"mp3"` */ audio_format?: "mp3" | "wav" | "aac" | "flac"; /** * URL of the video file to extract audio from */ video_url: string | Blob | File; }; export type ExtractNthFrameInput = { /** * Extract every Nth frame (e.g., 3 = every 3rd frame, 12 = every 12th frame) Default value: `12` */ frame_interval?: number; /** * Maximum number of frames to extract Default value: `100` */ max_frames?: number; /** * Output format for extracted frames Default value: `"png"` */ output_format?: "png" | "jpg" | "jpeg" | "webp"; /** * Quality for jpg/webp output (1-100) Default value: `95` */ quality?: number; /** * URL of the video file to extract frames from */ video_url: string | Blob | File; }; export type ExtractNthFrameOutput = { /** * Total number of frames extracted */ frame_count: number; /** * Array of extracted frame images */ images: Array; }; export type F5TtsInput = { /** * The text to be converted to speech. Maximum 5000 characters. */ gen_text: string; /** * The name of the model to be used for TTS. */ model_type: "F5-TTS" | "E2-TTS"; /** * The URL of the reference audio file. */ ref_audio_url: string | Blob | File; /** * The reference text to be used for TTS. If not provided, an ASR (Automatic Speech Recognition) model will be used to generate the reference text. Default value: `""` */ ref_text?: string; /** * Whether to remove the silence from the audio file. Default value: `true` */ remove_silence?: boolean; }; export type F5TtsOutput = { /** * The audio file containing the generated speech. */ audio_url: AudioFile; }; export type Fabric10Input = { /** * */ audio_url: string | Blob | File; /** * */ image_url: string | Blob | File; /** * Resolution */ resolution: "720p" | "480p"; }; export type Fabric10TextInput = { /** * */ image_url: string | Blob | File; /** * Resolution */ resolution: "720p" | "480p"; /** * */ text: string; /** * Optional additional voice description. The primary voice description is auto-generated from the image. You can use simple descriptors like 'British accent' or 'Confident' or provide a detailed description like 'Confident male voice, mid-20s, with notes of...' */ voice_description?: string; }; export type FaceChoice = { /** * Publicly accessible URL to the audio file. Supported formats: .mp3, .wav, .m4a (max 5MB). Duration must be between 2–60 seconds. */ audio_url: string | Blob | File; /** * ID of the face in the video. Returned by the `identify_face` API. */ face_id: string; /** * Volume multiplier for the video's original audio track. Range: [0, 2]. Has no effect if the source video contains no audio. Default value: `1` */ original_audio_volume?: number; /** * End time (ms) for cropping the source audio. Must be greater than `sound_start_time` and within the original audio duration. The cropped segment must be at least 2 seconds long. */ sound_end_time: number; /** * Time (ms) at which the cropped audio will be inserted into the video. Must meet both of the following conditions: * 1. `sound_insert_time` must be within the duration of the video (it cannot be greater than the total video length). * 2. The cropped audio segment must fully fit within the video when inserted — meaning `sound_insert_time + cropped_sound_length` must not exceed the video's total duration. * * In other words: the insert point must be inside the video, and the inserted audio must not extend past the end of the video. */ sound_insert_time: number; /** * Start time (ms) for cropping the source audio. Must be between 0 and the audio duration. The cropped audio must remain at least 2 seconds long. */ sound_start_time: number; /** * Volume multiplier for the inserted audio. Range: [0, 2], where 1 = original volume. Default value: `1` */ sound_volume?: number; }; export type FaceData = { /** * This face can be used as the ending time of lip-sync (milliseconds). Note: This value has a millisecond level error and will be longer than the actual ending time. */ end_time: number; /** * The face id of video. When the same person's face is separated by more than 1 second in the video, it will be considered as different IDs. */ face_id: string; /** * A schematic diagram of a face captured from a video (URL). */ face_image: string; /** * This face can be used as the starting time of lip-sync (milliseconds). */ start_time: number; }; export type FaceToStickerInput = { /** * If set to false, the safety checker will be disabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of the video. */ image_url: string | Blob | File; /** * The strength of the instant ID. Default value: `0.7` */ instant_id_strength?: number; /** * The amount of noise to add to the IP adapter. Default value: `0.5` */ ip_adapter_noise?: number; /** * The weight of the IP adapter. Default value: `0.2` */ ip_adapter_weight?: number; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `20` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * Whether to upscale the image 2x. */ upscale?: boolean; /** * The number of steps to use for upscaling. Only used if `upscale` is `true`. Default value: `10` */ upscale_steps?: number; }; export type FaceToStickerOutput = { /** * Whether the generated images contain NSFW concepts. * The key is the image type and the value is a boolean. */ has_nsfw_concepts: unknown; /** * The generated images. */ images: Array; /** * Seed used during the inference. */ seed: number; /** * The generated face sticker image. */ sticker_image: Image; /** * The generated face sticker image with the background removed. */ sticker_image_background_removed: Image; }; export type FastFooocusSdxlInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` */ enable_refiner?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. Default value: `true` */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `8` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; }; export type FastImageToVideoHailuo02Input = { /** * The duration of the video in seconds. 10 seconds videos are not supported for 1080p resolution. Default value: `"6"` */ duration?: "6" | "10"; /** * */ image_url: string | Blob | File; /** * */ prompt: string; /** * Whether to use the model's prompt optimizer Default value: `true` */ prompt_optimizer?: boolean; }; export type FastLightningSdxlInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `"4"` */ num_inference_steps?: "1" | "2" | "4" | "8"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FastSdxlControlnetCannyOutput = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image files info. */ images: Array; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; }; export type FastSdxlInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The list of LoRA weights to use. */ loras?: Array; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FastSvdLcmInput = { /** * The conditoning augmentation determines the amount of noise that will be * added to the conditioning frame. The higher the number, the more noise * there will be, and the less the video will look like the initial image. * Increase it for more motion. Default value: `0.02` */ cond_aug?: number; /** * The FPS of the generated video. The higher the number, the faster the video will * play. Total video length is 25 frames. Default value: `10` */ fps?: number; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The motion bucket id determines the motion of the generated video. The * higher the number, the more motion there will be. Default value: `127` */ motion_bucket_id?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The number of steps to run the model for. The higher the number the better * the quality and longer it will take to generate. Default value: `4` */ steps?: number; }; export type FastSVDTextInput = { /** * The conditoning augmentation determines the amount of noise that will be * added to the conditioning frame. The higher the number, the more noise * there will be, and the less the video will look like the initial image. * Increase it for more motion. Default value: `0.02` */ cond_aug?: number; /** * The FPS of the generated video. The higher the number, the faster the video will * play. Total video length is 25 frames. Default value: `10` */ fps?: number; /** * The motion bucket id determines the motion of the generated video. The * higher the number, the more motion there will be. Default value: `127` */ motion_bucket_id?: number; /** * The prompt to use as a starting point for the generation. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The number of steps to run the model for. The higher the number the better * the quality and longer it will take to generate. Default value: `4` */ steps?: number; /** * The size of the generated video. Default value: `landscape_16_9` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; }; export type FastSvdTextToVideoInput = { /** * The conditoning augmentation determines the amount of noise that will be * added to the conditioning frame. The higher the number, the more noise * there will be, and the less the video will look like the initial image. * Increase it for more motion. Default value: `0.02` */ cond_aug?: number; /** * The FPS of the generated video. The higher the number, the faster the video will * play. Total video length is 25 frames. Default value: `10` */ fps?: number; /** * The motion bucket id determines the motion of the generated video. The * higher the number, the more motion there will be. Default value: `127` */ motion_bucket_id?: number; /** * The negative prompt to use as a starting point for the generation. Default value: `"unrealistic, saturated, high contrast, big nose, painting, drawing, sketch, cartoon, anime, manga, render, CG, 3d, watermark, signature, label"` */ negative_prompt?: string; /** * The prompt to use as a starting point for the generation. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The number of steps to run the model for. The higher the number the better * the quality and longer it will take to generate. Default value: `20` */ steps?: number; /** * The size of the generated video. Default value: `landscape_16_9` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; }; export type FiboBbqPreviewGenerateInput = { /** * Aspect ratio. Options: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9 Default value: `"1:1"` */ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9"; /** * Guidance scale for text. Default value: `5` */ guidance_scale?: number; /** * Reference image (file or URL). */ image_url?: string | Blob | File; /** * Negative prompt for image generation. Default value: `""` */ negative_prompt?: string; /** * Prompt for image generation. */ prompt?: string; /** * Random seed for reproducibility. Default value: `5555` */ seed?: number; /** * Number of inference steps. Default value: `50` */ steps_num?: number; /** * The structured prompt to generate an image from. */ structured_prompt?: StructuredPrompt | string | unknown; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; }; export type FiboEditEditInput = { /** * Guidance scale for text. Default value: `5` */ guidance_scale?: number | number; /** * Reference image (file or URL). */ image_url?: string | Blob | File; /** * Instruction for image editing. */ instruction?: string; /** * Mask image (file or URL). Optional */ mask_url?: string | Blob | File; /** * Negative prompt for image generation. Default value: `""` */ negative_prompt?: string; /** * The new vgl describing image after edit. */ new_vgl?: StructuredInstruction; /** * The original vgl used to generate the image. */ original_vgl?: StructuredInstruction; /** * Random seed for reproducibility. Default value: `5555` */ seed?: number; /** * Number of inference steps. Default value: `30` */ steps_num?: number; /** * The structured prompt to generate an image from. */ structured_instruction?: StructuredInstruction; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; }; export type FiboEditEditOutput = { /** * Generated image. */ image: Image; /** * Generated images. */ images?: Array; /** * Current instruction. */ structured_instruction: unknown; }; export type FiboEditEditStructuredInstructionInput = { /** * Reference image (file or URL). */ image_url?: string | Blob | File; /** * Instruction for image editing. */ instruction?: string; /** * Reference image mask (file or URL). Optional. */ mask_url?: string | Blob | File; /** * Random seed for reproducibility. Default value: `5555` */ seed?: number; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; }; export type FiboFastImageGenerationInput = { /** * Aspect ratio. Options: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9 Default value: `"1:1"` */ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9"; /** * Input image URL */ image_url?: string | Blob | File; /** * Negative prompt for image generation. Default value: `""` */ negative_prompt?: string; /** * The prompt to generate. */ prompt?: string; /** * Seed for the random number generator. Default value: `7` */ seed?: number; /** * Number of inference steps. Default value: `8` */ steps_num?: number; /** * The structured prompt to generate. */ structured_prompt?: StructuredPrompt; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; }; export type FiboFastImageGenerationOutput = { /** * Generated image. */ image: Image; /** * Generated images. */ images?: Array; /** * Current prompt. */ structured_prompt: StructuredPrompt; }; export type FiboGenerateInput = { /** * Aspect ratio. Options: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9 Default value: `"1:1"` */ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9"; /** * Reference image (file or URL). */ image_url?: string | Blob | File; /** * Negative prompt for image generation. Default value: `""` */ negative_prompt?: string; /** * Prompt for image generation. */ prompt?: string; /** * Output image resolution Default value: `"1MP"` */ resolution?: "1MP" | "4MP"; /** * Random seed for reproducibility. Default value: `5555` */ seed?: number; /** * Number of inference steps. Default value: `50` */ steps_num?: number; /** * The structured prompt to generate an image from. */ structured_prompt?: StructuredPrompt; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; }; export type FiboGenerateOutput = { /** * Generated image. */ image: Image; /** * Generated images. */ images?: Array; /** * Current prompt. */ structured_prompt: unknown; }; export type FiboLiteGenerateStructuredPromptLiteInput = { /** * Reference image (file or URL). */ image_url?: string | Blob | File; /** * Prompt for image generation. */ prompt?: string; /** * Random seed for reproducibility. Default value: `5555` */ seed?: number; /** * The structured prompt to generate an image from. */ structured_prompt?: BriaFiboVlmStructuredprompt; }; export type File = { /** * The mime type of the file. */ content_type?: string; /** * File data */ file_data?: string; /** * The name of the file. It will be auto-generated if not provided. */ file_name?: string; /** * The size of the file in bytes. */ file_size?: number; /** * The URL where the file can be downloaded from. */ url: string; }; export type filmInput = { /** * The URL of the second image to use as the ending point for interpolation. */ end_image_url: string | Blob | File; /** * Frames per second for the output video. Only applicable if output_type is 'video'. Default value: `8` */ fps?: number; /** * The format of the output images. Only applicable if output_type is 'images'. Default value: `"jpeg"` */ image_format?: "png" | "jpeg"; /** * Whether to include the end image in the output. */ include_end?: boolean; /** * Whether to include the start image in the output. */ include_start?: boolean; /** * The number of frames to generate between the input images. Default value: `1` */ num_frames?: number; /** * The type of output to generate; either individual images or a video. Default value: `"images"` */ output_type?: "images" | "video"; /** * The URL of the first image to use as the starting point for interpolation. */ start_image_url: string | Blob | File; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The quality of the output video. Only applicable if output_type is 'video'. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the output video. Only applicable if output_type is 'video'. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type filmOutput = { /** * The generated frames as individual images. */ images?: Array; /** * The generated video file, if output_type is 'video'. */ video?: VideoFile; }; export type FilmVideoInput = { /** * Frames per second for the output video. Only applicable if use_calculated_fps is False. Default value: `8` */ fps?: number; /** * If True, the final frame will be looped back to the first frame to create a seamless loop. If False, the final frame will not loop back. */ loop?: boolean; /** * The number of frames to generate between the input video frames. Default value: `1` */ num_frames?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * If True, the function will use the calculated FPS of the input video multiplied by the number of frames to determine the output FPS. If False, the passed FPS will be used. Default value: `true` */ use_calculated_fps?: boolean; /** * If True, the input video will be split into scenes before interpolation. This removes smear frames between scenes, but can result in false positives if the scene detection is not accurate. If False, the entire video will be treated as a single scene. */ use_scene_detection?: boolean; /** * The quality of the output video. Only applicable if output_type is 'video'. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The URL of the video to use for interpolation. */ video_url: string | Blob | File; /** * The write mode of the output video. Only applicable if output_type is 'video'. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type FinegrainEraserBboxInput = { /** * List of bounding box coordinates to erase (only one box prompt is supported) */ box_prompts: Array; /** * URL of the image to edit */ image_url: string | Blob | File; /** * Erase quality mode Default value: `"standard"` */ mode?: "express" | "standard" | "premium"; /** * Random seed for reproducible generation */ seed?: number; }; export type FinegrainEraserInput = { /** * URL of the image to edit */ image_url: string | Blob | File; /** * Erase quality mode Default value: `"standard"` */ mode?: "express" | "standard" | "premium"; /** * Text description of what to erase */ prompt: string; /** * Random seed for reproducible generation */ seed?: number; }; export type FinegrainEraserMaskInput = { /** * URL of the image to edit */ image_url: string | Blob | File; /** * URL of the mask image. Should be a binary mask where white (255) indicates areas to erase */ mask_url: string | Blob | File; /** * Erase quality mode Default value: `"standard"` */ mode?: "express" | "standard" | "premium"; /** * Random seed for reproducible generation */ seed?: number; }; export type FinegrainEraserOutput = { /** * The edited image with content erased */ image: File; /** * Seed used for generation */ used_seed: number; }; export type FlashvsrUpscaleVideoInput = { /** * Acceleration mode for VAE decoding. Options: regular (best quality), high (balanced), full (fastest). More accerleation means longer duration videos can be processed too. Default value: `"regular"` */ acceleration?: "regular" | "high" | "full"; /** * Color correction enabled. Default value: `true` */ color_fix?: boolean; /** * The format of the output video. Default value: `"X264 (.mp4)"` */ output_format?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the output video. Default value: `"high"` */ output_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the output video. Default value: `"balanced"` */ output_write_mode?: "fast" | "balanced" | "small"; /** * Copy the original audio tracks into the upscaled video using FFmpeg when possible. */ preserve_audio?: boolean; /** * Quality level for tile blending (0-100). Controls overlap between tiles to prevent grid artifacts. Higher values provide better quality with more overlap. Recommended: 70-85 for high-res videos, 50-70 for faster processing. Default value: `70` */ quality?: number; /** * The random seed used for the generation process. */ seed?: number; /** * If `True`, the media will be returned inline and not stored in history. */ sync_mode?: boolean; /** * Upscaling factor to be used. Default value: `2` */ upscale_factor?: number; /** * The input video to be upscaled */ video_url: string | Blob | File; }; export type FLiteTextureInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Negative Prompt for generation. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FloatOutput = { /** * The value of the measurement. */ value: number; }; export type Florence2LargeOcrWithRegionOutput = { /** * Processed image */ image?: Image; /** * Results from the model */ results: OCRBoundingBox; }; export type Florence2LargeRegionProposalOutput = { /** * Processed image */ image?: Image; /** * Results from the model */ results: BoundingBoxes; }; export type Florence2LargeRegionToSegmentationOutput = { /** * Processed image */ image?: Image; /** * Results from the model */ results: PolygonOutput; }; export type floweditInput = { /** * URL of image to be used for relighting */ image_url: string | Blob | File; /** * Average step count Default value: `1` */ n_avg?: number; /** * Control the strength of the edit Default value: `23` */ n_max?: number; /** * Minimum step for improved style edits */ n_min?: number; /** * Steps for which the model should run. Default value: `28` */ num_inference_steps?: number; /** * Random seed for reproducible generation. If set none, a random seed will be used. */ seed?: number; /** * Prompt of the image to be used. */ source_prompt: string; /** * Guidance scale for the source. Default value: `1.5` */ src_guidance_scale?: number; /** * Guidance scale for target. Default value: `5.5` */ tar_guidance_scale?: number; /** * Prompt of the image to be made. */ target_prompt: string; }; export type Flux2EditInput = { /** * The acceleration level to use for the image generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the prompt will be expanded for better results. */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance Scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the image to generate. The width and height must be between 512 and 2048 pixels. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images for editing. A maximum of 4 images are allowed, if more are provided, only the first 4 will be used. */ image_urls: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2FlashEditInput = { /** * If set to true, the prompt will be expanded for better results. */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance Scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the image to generate. The width and height must be between 512 and 2048 pixels. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images for editing. A maximum of 4 images are allowed, if more are provided, only the first 4 will be used. */ image_urls: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2FlashInput = { /** * If set to true, the prompt will be expanded for better results. */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance Scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the image to generate. The width and height must be between 512 and 2048 pixels. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2FlexEditInput = { /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the generation. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. If `auto`, the size will be determined by the model. Default value: `auto` */ image_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * List of URLs of input images for editing */ image_urls: Array; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5"; /** * The seed to use for the generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2FlexInput = { /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the generation. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5"; /** * The seed to use for the generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2Input = { /** * The acceleration level to use for the image generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the prompt will be expanded for better results. */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance Scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the image to generate. The width and height must be between 512 and 2048 pixels. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2Klein4bEditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. If not provided, uses the input image size. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images for editing. A maximum of 4 images are allowed. */ image_urls: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI. Output is not stored when this is True. */ sync_mode?: boolean; }; export type Flux2Klein4bEditLoraInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. If not provided, uses the input image size. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images for editing. A maximum of 4 images are allowed. */ image_urls: Array; /** * List of LoRA weights to apply (maximum 3). */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI. Output is not stored when this is True. */ sync_mode?: boolean; }; export type Flux2LoraEditInput = { /** * The acceleration level to use for the image generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the prompt will be expanded for better results. */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance Scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the image to generate. The width and height must be between 512 and 2048 pixels. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URsL of the images for editing. A maximum of 3 images are allowed, if more are provided, only the first 3 will be used. */ image_urls: Array; /** * List of LoRA weights to apply (maximum 3). Each LoRA can be a URL, HuggingFace repo ID, or local path. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2LoraGalleryAddBackgroundInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable the safety checker for the generated image. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale. Controls how closely the model follows the prompt. Default value: `2.5` */ guidance_scale?: number; /** * The size of the generated image. If not provided, the size of the input image will be used. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images. Provide an image with a white or clean background. */ image_urls: Array; /** * The strength of the add background effect. Default value: `1` */ lora_scale?: number; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `40` */ num_inference_steps?: number; /** * The format of the output image Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * The prompt describing the background to add. Must start with 'Add Background' followed by your description. Default value: `"Add Background forest"` */ prompt?: string; /** * Random seed for reproducibility. Same seed with same prompt will produce same result. */ seed?: number; /** * If `True`, the media will be returned as a data URI and won't be saved in history. */ sync_mode?: boolean; }; export type Flux2LoraGalleryMultipleAnglesInput = { /** * Acceleration level for image generation. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale. Default value: `2.5` */ guidance_scale?: number; /** * Horizontal rotation angle around the object in degrees. 0°=front view, 90°=right side, 180°=back view, 270°=left side, 360°=front view again. */ horizontal_angle?: number; /** * The size of the generated image. If not provided, the size of the input image will be used. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to adjust camera angle for. */ image_urls: Array; /** * The strength of the multiple angles effect. Default value: `1` */ lora_scale?: number; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `40` */ num_inference_steps?: number; /** * The format of the output image. Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * Random seed for reproducibility. */ seed?: number; /** * If True, the media will be returned as a data URI. */ sync_mode?: boolean; /** * Vertical camera angle in degrees. 0°=eye-level shot, 30°=elevated shot, 60°=high-angle shot (looking down from above). */ vertical_angle?: number; /** * Camera zoom/distance. 0=wide shot (far away), 5=medium shot (normal), 10=close-up (very close). Default value: `5` */ zoom?: number; }; export type Flux2LoraInput = { /** * The acceleration level to use for the image generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the prompt will be expanded for better results. */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance Scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the image to generate. The width and height must be between 512 and 2048 pixels. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * List of LoRA weights to apply (maximum 3). Each LoRA can be a URL, HuggingFace repo ID, or local path. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2MaxEditInput = { /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. If `auto`, the size will be determined by the model. Default value: `auto` */ image_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * List of URLs of input images for editing */ image_urls: Array; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5"; /** * The seed to use for the generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2MaxInput = { /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5"; /** * The seed to use for the generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Flux2Output = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated images */ images: Array; /** * The prompt used for generating the image. */ prompt: string; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; }; export type Flux2TrainerInput = { /** * Default caption to use when caption files are missing. If None, missing captions will cause an error. */ default_caption?: string; /** * URL to zip archive with images of a consistent style. Try to use at least 10 images, although more is better. * * The zip can also contain a text file for each image. The text file should be named: * ROOT.txt * For example: * photo.txt * * This text file can be used to specify the edit instructions for the image pair. * * If no text file is provided, the default_caption will be used. * * If no default_caption is provided, the training will fail. */ image_data_url: string | Blob | File; /** * Learning rate applied to trainable parameters. Default value: `0.00005` */ learning_rate?: number; /** * Dictates the naming scheme for the output weights Default value: `"fal"` */ output_lora_format?: "fal" | "comfy"; /** * Total number of training steps. Default value: `1000` */ steps?: number; }; export type Flux2TrainerOutput = { /** * URL to the configuration file for the trained model. */ config_file: File; /** * URL to the trained diffusers lora weights. */ diffusers_lora_file: File; }; export type FluxControlLoraCannyImageToImageInput = { /** * The image to use for control lora. This is used to control the style of the generated image. */ control_lora_image_url: string | Blob | File; /** * The strength of the control lora. Default value: `1` */ control_lora_strength?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxDevInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxDifferentialDiffusionInput = { /** * URL of change map. */ change_map_image_url: string | Blob | File; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * URL of image to use as initial image. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for image-to-image. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxGeneralImageToImageInput = { /** * Base shift for the scheduled timesteps Default value: `0.5` */ base_shift?: number; /** * The LoRAs to use for the image generation which use a control image. You can use any number of LoRAs * and they will be merged together to generate the final image. */ control_loras?: Array; /** * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. */ controlnet_unions?: Array; /** * The controlnets to use for the image generation. Only one controlnet is supported at the moment. */ controlnets?: Array; /** * EasyControl Inputs to use for image generation. */ easycontrols?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Use an image input to influence the generation. Can be used to fill images in masked areas. */ fill_image?: ImageFillInput; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * IP-Adapter to use for image generation. */ ip_adapters?: Array; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * Max shift for the scheduled timesteps Default value: `1.15` */ max_shift?: number; /** * The alpha value for NAG. This value is used as a final weighting * factor for steering the normalized guidance (positive and negative prompts) * in the direction of the positive prompt. Higher values will result in less * steering on the normalized guidance where lower values will result in * considering the positive prompt guidance more. Default value: `0.25` */ nag_alpha?: number; /** * The proportion of steps to apply NAG. After the specified proportion * of steps has been iterated, the remaining steps will use original * attention processors in FLUX. Default value: `0.25` */ nag_end?: number; /** * The scale for NAG. Higher values will result in a image that is more distant * to the negative prompt. Default value: `3` */ nag_scale?: number; /** * The tau for NAG. Controls the normalization of the hidden state. * Higher values will result in a less aggressive normalization, * but may also lead to unexpected changes with respect to the original image. * Not recommended to change this value. Default value: `2.5` */ nag_tau?: number; /** * Negative prompt to steer the image generation away from unwanted features. * By default, we will be using NAG for processing the negative prompt. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ real_cfg_scale?: number; /** * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ reference_end?: number; /** * URL of Image for Reference-Only */ reference_image_url?: string | Blob | File; /** * The percentage of the total timesteps when the reference guidance is to bestarted. */ reference_start?: number; /** * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ reference_strength?: number; /** * Scheduler for the denoising process. Default value: `"euler"` */ scheduler?: "euler" | "dpmpp_2m"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Sigmas schedule for the denoising process. */ sigma_schedule?: string; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Specifies whether beta sigmas ought to be used. */ use_beta_schedule?: boolean; /** * Uses CFG-zero init sampling as in https://arxiv.org/abs/2503.18886. */ use_cfg_zero?: boolean; /** * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. * If using XLabs IP-Adapter v1, this will be turned on!. */ use_real_cfg?: boolean; }; export type FluxGeneralInpaintingInput = { /** * Base shift for the scheduled timesteps Default value: `0.5` */ base_shift?: number; /** * The LoRAs to use for the image generation which use a control image. You can use any number of LoRAs * and they will be merged together to generate the final image. */ control_loras?: Array; /** * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. */ controlnet_unions?: Array; /** * The controlnets to use for the image generation. Only one controlnet is supported at the moment. */ controlnets?: Array; /** * EasyControl Inputs to use for image generation. */ easycontrols?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Use an image input to influence the generation. Can be used to fill images in masked areas. */ fill_image?: ImageFillInput; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * IP-Adapter to use for image generation. */ ip_adapters?: Array; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The mask to area to Inpaint in. */ mask_url: string | Blob | File; /** * Max shift for the scheduled timesteps Default value: `1.15` */ max_shift?: number; /** * The alpha value for NAG. This value is used as a final weighting * factor for steering the normalized guidance (positive and negative prompts) * in the direction of the positive prompt. Higher values will result in less * steering on the normalized guidance where lower values will result in * considering the positive prompt guidance more. Default value: `0.25` */ nag_alpha?: number; /** * The proportion of steps to apply NAG. After the specified proportion * of steps has been iterated, the remaining steps will use original * attention processors in FLUX. Default value: `0.25` */ nag_end?: number; /** * The scale for NAG. Higher values will result in a image that is more distant * to the negative prompt. Default value: `3` */ nag_scale?: number; /** * The tau for NAG. Controls the normalization of the hidden state. * Higher values will result in a less aggressive normalization, * but may also lead to unexpected changes with respect to the original image. * Not recommended to change this value. Default value: `2.5` */ nag_tau?: number; /** * Negative prompt to steer the image generation away from unwanted features. * By default, we will be using NAG for processing the negative prompt. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ real_cfg_scale?: number; /** * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ reference_end?: number; /** * URL of Image for Reference-Only */ reference_image_url?: string | Blob | File; /** * The percentage of the total timesteps when the reference guidance is to bestarted. */ reference_start?: number; /** * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ reference_strength?: number; /** * Scheduler for the denoising process. Default value: `"euler"` */ scheduler?: "euler" | "dpmpp_2m"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Sigmas schedule for the denoising process. */ sigma_schedule?: string; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Specifies whether beta sigmas ought to be used. */ use_beta_schedule?: boolean; /** * Uses CFG-zero init sampling as in https://arxiv.org/abs/2503.18886. */ use_cfg_zero?: boolean; /** * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. * If using XLabs IP-Adapter v1, this will be turned on!. */ use_real_cfg?: boolean; }; export type FluxGeneralInput = { /** * Base shift for the scheduled timesteps Default value: `0.5` */ base_shift?: number; /** * The LoRAs to use for the image generation which use a control image. You can use any number of LoRAs * and they will be merged together to generate the final image. */ control_loras?: Array; /** * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. */ controlnet_unions?: Array; /** * The controlnets to use for the image generation. Only one controlnet is supported at the moment. */ controlnets?: Array; /** * EasyControl Inputs to use for image generation. */ easycontrols?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Use an image input to influence the generation. Can be used to fill images in masked areas. */ fill_image?: ImageFillInput; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * IP-Adapter to use for image generation. */ ip_adapters?: Array; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * Max shift for the scheduled timesteps Default value: `1.15` */ max_shift?: number; /** * The alpha value for NAG. This value is used as a final weighting * factor for steering the normalized guidance (positive and negative prompts) * in the direction of the positive prompt. Higher values will result in less * steering on the normalized guidance where lower values will result in * considering the positive prompt guidance more. Default value: `0.25` */ nag_alpha?: number; /** * The proportion of steps to apply NAG. After the specified proportion * of steps has been iterated, the remaining steps will use original * attention processors in FLUX. Default value: `0.25` */ nag_end?: number; /** * The scale for NAG. Higher values will result in a image that is more distant * to the negative prompt. Default value: `3` */ nag_scale?: number; /** * The tau for NAG. Controls the normalization of the hidden state. * Higher values will result in a less aggressive normalization, * but may also lead to unexpected changes with respect to the original image. * Not recommended to change this value. Default value: `2.5` */ nag_tau?: number; /** * Negative prompt to steer the image generation away from unwanted features. * By default, we will be using NAG for processing the negative prompt. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ real_cfg_scale?: number; /** * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ reference_end?: number; /** * URL of Image for Reference-Only */ reference_image_url?: string | Blob | File; /** * The percentage of the total timesteps when the reference guidance is to bestarted. */ reference_start?: number; /** * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ reference_strength?: number; /** * Scheduler for the denoising process. Default value: `"euler"` */ scheduler?: "euler" | "dpmpp_2m"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Sigmas schedule for the denoising process. */ sigma_schedule?: string; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Specifies whether beta sigmas ought to be used. */ use_beta_schedule?: boolean; /** * Uses CFG-zero init sampling as in https://arxiv.org/abs/2503.18886. */ use_cfg_zero?: boolean; /** * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. * If using XLabs IP-Adapter v1, this will be turned on!. */ use_real_cfg?: boolean; }; export type FluxKontextDevInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * Output format Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to edit the image. */ prompt: string; /** * Determines how the output resolution is set for image editing. * - `auto`: The model selects an optimal resolution from a predefined set that best matches the input image's aspect ratio. This is the recommended setting for most use cases as it's what the model was trained on. * - `match_input`: The model will attempt to use the same resolution as the input image. The resolution will be adjusted to be compatible with the model's requirements (e.g. dimensions must be multiples of 16 and within supported limits). * Apart from these, a few aspect ratios are also supported. Default value: `"match_input"` */ resolution_mode?: "auto" | "match_input" | "1:1" | "16:9" | "21:9" | "3:2" | "2:3" | "4:5" | "5:4" | "3:4" | "4:3" | "9:16" | "9:21"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxKontextInput = { /** * The aspect ratio of the generated image. */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21"; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * Image prompt for the omni model. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxKontextMultiInput = { /** * The aspect ratio of the generated image. */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21"; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * Image prompt for the omni model. */ image_urls: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxKontextOutput = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image files info. */ images: Array; /** * The prompt used for generating the image. */ prompt: string; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; }; export type FluxKreaLoraInpaintingInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"none"` */ acceleration?: "none" | "regular"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The mask to area to Inpaint in. */ mask_url: string | Blob | File; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxKreaTrainerInput = { /** * If True segmentation masks will be used in the weight the training loss. For people a face mask is used if possible. Default value: `true` */ create_masks?: boolean; /** * The format of the archive. If not specified, the format will be inferred from the URL. */ data_archive_format?: string; /** * URL to zip archive with images. Try to use at least 4 images in general the more the better. * * In addition to images the archive can contain text files with captions. Each text file should have the same name as the image file it corresponds to. */ images_data_url: string | Blob | File; /** * Specifies whether the input data is already in a processed format. When set to False (default), the system expects raw input where image files and their corresponding caption files share the same name (e.g., 'photo.jpg' and 'photo.txt'). Set to True if your data is already in a preprocessed format. */ is_input_format_already_preprocessed?: boolean; /** * If True, the training will be for a style. This will deactivate segmentation, captioning and will use trigger word instead. Use the trigger word to specify the style. */ is_style?: boolean; /** * Number of steps to train the LoRA on. */ steps?: number; /** * Trigger word to be used in the captions. If None, a trigger word will not be used. * If no captions are provide the trigger_word will be used instead of captions. If captions are the trigger word will not be used. */ trigger_word?: string; }; export type FluxKreaTrainerOutput = { /** * URL to the training configuration file. */ config_file: File; /** * URL to the preprocessed images. */ debug_preprocessed_output?: File; /** * URL to the trained diffusers lora weights. */ diffusers_lora_file: File; }; export type FluxLoraCannyInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `30` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for canny input */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type FluxLoraDepthInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"none"` */ acceleration?: "none" | "regular"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for depth input */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxLoraFillInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"none"` */ acceleration?: "none" | "regular"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Use an image fill input to fill in particular images into the masked area. */ fill_image?: ImageFillInput; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `30` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for fill operation */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The mask to area to Inpaint in. */ mask_url: string | Blob | File; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * Specifies whether to paste-back the original image onto to the non-inpainted areas of the output Default value: `true` */ paste_back?: boolean; /** * The prompt to generate an image from. Default value: `""` */ prompt?: string; /** * Resizes the image back to the original size. Use when you wish to preserve the exact image size as the originally provided image. */ resize_to_original?: boolean; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxLoraImageToImageInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"none"` */ acceleration?: "none" | "regular"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxLoraInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"none"` */ acceleration?: "none" | "regular"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxLoraPortraitTrainerInput = { /** * If True, masks will be created for the subject. */ create_masks?: boolean; /** * The format of the archive. If not specified, the format will be inferred from the URL. */ data_archive_format?: string; /** * URL to zip archive with images of a consistent style. Try to use at least 10 images, although more is better. * * In addition to images the archive can contain text files with captions. Each text file should have the same name as the image file it corresponds to. * * The captions can include a special string `[trigger]`. If a trigger_word is specified, it will replace `[trigger]` in the captions. */ images_data_url: string | Blob | File; /** * Learning rate to use for training. Default value: `0.00009` */ learning_rate?: number; /** * If True, multiresolution training will be used. Default value: `true` */ multiresolution_training?: boolean; /** * URL to a checkpoint to resume training from. Default value: `""` */ resume_from_checkpoint?: string; /** * Number of steps to train the LoRA on. Default value: `2500` */ steps?: number; /** * If True, the subject will be cropped from the image. Default value: `true` */ subject_crop?: boolean; /** * Trigger phrase to be used in the captions. If None, a trigger word will not be used. * If no captions are provide the trigger_work will be used instead of captions. If captions are provided, the trigger word will replace the `[trigger]` string in the captions. */ trigger_phrase?: string; }; export type FluxProCannyControlFinetunedInput = { /** * The control image URL to generate the Canny edge map from. */ control_image_url: string | Blob | File; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * References your specific model */ finetune_id: string; /** * Controls finetune influence. * Increase this value if your target concept isn't showing up strongly enough. * The optimal setting depends on your finetune and prompt */ finetune_strength: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `30` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProCannyControlInput = { /** * The control image URL to generate the Canny edge map from. */ control_image_url: string | Blob | File; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProFillFinetunedInput = { /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * References your specific model */ finetune_id: string; /** * Controls finetune influence. * Increase this value if your target concept isn't showing up strongly enough. * The optimal setting depends on your finetune and prompt */ finetune_strength: number; /** * The image URL to generate an image from. Needs to match the dimensions of the mask. */ image_url: string | Blob | File; /** * The mask URL to inpaint the image. Needs to match the dimensions of the input image. */ mask_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to fill the masked part of the image. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProFillInput = { /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The image URL to generate an image from. Needs to match the dimensions of the mask. */ image_url: string | Blob | File; /** * The mask URL to inpaint the image. Needs to match the dimensions of the input image. */ mask_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to fill the masked part of the image. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProKontextTextToImageInput = { /** * The aspect ratio of the generated image. Default value: `"1:1"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21"; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProNewInput = { /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProNewOutput = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image files info. */ images: Array; /** * The prompt used for generating the image. */ prompt: string; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; }; export type FluxProOutpaintInput = { /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * Pixels to expand at the bottom */ expand_bottom?: number; /** * Pixels to expand on the left */ expand_left?: number; /** * Pixels to expand on the right */ expand_right?: number; /** * Pixels to expand at the top */ expand_top?: number; /** * The image URL to expand using outpainting */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProTextToImageFinetunedInput = { /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * References your specific model */ finetune_id: string; /** * Controls finetune influence. * Increase this value if your target concept isn't showing up strongly enough. * The optimal setting depends on your finetune and prompt */ finetune_strength: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProV11Input = { /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProV11UltraFinetunedInput = { /** * The aspect ratio of the generated image. Default value: `16:9` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21" | string; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * References your specific model */ finetune_id: string; /** * Controls finetune influence. * Increase this value if your target concept isn't showing up strongly enough. * The optimal setting depends on your finetune and prompt */ finetune_strength: number; /** * The strength of the image prompt, between 0 and 1. Default value: `0.1` */ image_prompt_strength?: number; /** * The image URL to generate an image from. */ image_url?: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * Generate less processed, more natural-looking images. */ raw?: boolean; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProV11UltraInput = { /** * The aspect ratio of the generated image. Default value: `16:9` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21" | string; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The strength of the image prompt, between 0 and 1. Default value: `0.1` */ image_prompt_strength?: number; /** * The image URL to generate an image from. */ image_url?: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * Generate less processed, more natural-looking images. */ raw?: boolean; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProV11UltraReduxInput = { /** * The aspect ratio of the generated image. Default value: `16:9` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21" | string; /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The strength of the image prompt, between 0 and 1. Default value: `0.1` */ image_prompt_strength?: number; /** * The image URL to generate an image from. Needs to match the dimensions of the mask. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. Default value: `""` */ prompt?: string; /** * Generate less processed, more natural-looking images. */ raw?: boolean; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxProV1ReduxInput = { /** * Whether to enhance the prompt for better results. */ enhance_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The image URL to generate an image from. Needs to match the dimensions of the mask. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. Default value: `""` */ prompt?: string; /** * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxPulidInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The weight of the ID loss. Default value: `1` */ id_weight?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The maximum sequence length for the model. Default value: `"128"` */ max_sequence_length?: "128" | "256" | "512"; /** * The prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `20` */ num_inference_steps?: number; /** * The prompt to generate an image from. */ prompt: string; /** * URL of image to use for inpainting. */ reference_image_url: string | Blob | File; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The number of steps to start the CFG from. */ start_step?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The weight of the CFG loss. Default value: `1` */ true_cfg?: number; }; export type FluxSchnellReduxInput = { /** * The speed of the generation. The higher the speed, the faster the generation. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to generate an image from. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type FluxSubjectInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image of the subject */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `8` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type FluxVisionUpscalerInput = { /** * The creativity of the model. The higher the creativity, the more the model will deviate from the original. Refers to the denoise strength of the sampling. Default value: `0.3` */ creativity?: number; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * CFG/guidance scale (1-4). Controls how closely the model follows the prompt. Default value: `1` */ guidance?: number; /** * The URL of the image to upscale. */ image_url: string | Blob | File; /** * The seed to use for the upscale. If not provided, a random seed will be used. */ seed?: number; /** * Number of inference steps (4-50). Default value: `20` */ steps?: number; /** * The upscale factor (1-4x). Default value: `2` */ upscale_factor?: number; }; export type FluxVisionUpscalerOutput = { /** * The VLM-generated caption used for upscaling. */ caption: string; /** * The URL of the generated image. */ image: Image; /** * The seed used to generate the image. */ seed: number; /** * The timings of the different steps in the workflow. */ timings: unknown; }; export type FooocusImagePromptInput = { /** * The size of the generated image. You can choose between some presets or * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` */ aspect_ratio?: string; /** * If set to false, the safety checker will be disabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * */ image_prompt_1: ImagePrompt; /** * */ image_prompt_2?: ImagePrompt; /** * */ image_prompt_3?: ImagePrompt; /** * */ image_prompt_4?: ImagePrompt; /** * Describe what you want to inpaint. Default value: `""` */ inpaint_additional_prompt?: string; /** * The image to use as a reference for inpainting. */ inpaint_image_url?: string | Blob | File; /** * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` */ inpaint_mode?: "Inpaint or Outpaint (default)" | "Improve Detail (face, hand, eyes, etc.)" | "Modify Content (add objects, change background, etc.)"; /** * The LoRAs to use for the image generation. You can use up to 5 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The image to use as a mask for the generated image. */ mask_image_url?: string | Blob | File; /** * Mixing Image Prompt and Inpaint */ mixing_image_prompt_and_inpaint?: boolean; /** * Mixing Image Prompt and Vary/Upscale */ mixing_image_prompt_and_vary_upscale?: boolean; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Number of images to generate in one request Default value: `1` */ num_images?: number; /** * The directions to outpaint. */ outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "png" | "jpeg" | "webp"; /** * You can choose Speed or Quality Default value: `"Extreme Speed"` */ performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` */ prompt?: string; /** * Refiner (SDXL or SD 1.5) Default value: `"None"` */ refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; /** * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` */ refiner_switch?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The sharpness of the generated image. Use it to control how sharp the generated * image should be. Higher value means image and texture are sharper. Default value: `2` */ sharpness?: number; /** * The style to use. */ styles?: Array<"Fooocus V2" | "Fooocus Enhance" | "Fooocus Sharp" | "Fooocus Semi Realistic" | "Fooocus Masterpiece" | "Fooocus Photograph" | "Fooocus Negative" | "Fooocus Cinematic" | "SAI 3D Model" | "SAI Analog Film" | "SAI Anime" | "SAI Cinematic" | "SAI Comic Book" | "SAI Craft Clay" | "SAI Digital Art" | "SAI Enhance" | "SAI Fantasy Art" | "SAI Isometric" | "SAI Line Art" | "SAI Lowpoly" | "SAI Neonpunk" | "SAI Origami" | "SAI Photographic" | "SAI Pixel Art" | "SAI Texture" | "MRE Cinematic Dynamic" | "MRE Spontaneous Picture" | "MRE Artistic Vision" | "MRE Dark Dream" | "MRE Gloomy Art" | "MRE Bad Dream" | "MRE Underground" | "MRE Surreal Painting" | "MRE Dynamic Illustration" | "MRE Undead Art" | "MRE Elemental Art" | "MRE Space Art" | "MRE Ancient Illustration" | "MRE Brave Art" | "MRE Heroic Fantasy" | "MRE Dark Cyberpunk" | "MRE Lyrical Geometry" | "MRE Sumi E Symbolic" | "MRE Sumi E Detailed" | "MRE Manga" | "MRE Anime" | "MRE Comic" | "Ads Advertising" | "Ads Automotive" | "Ads Corporate" | "Ads Fashion Editorial" | "Ads Food Photography" | "Ads Gourmet Food Photography" | "Ads Luxury" | "Ads Real Estate" | "Ads Retail" | "Artstyle Abstract" | "Artstyle Abstract Expressionism" | "Artstyle Art Deco" | "Artstyle Art Nouveau" | "Artstyle Constructivist" | "Artstyle Cubist" | "Artstyle Expressionist" | "Artstyle Graffiti" | "Artstyle Hyperrealism" | "Artstyle Impressionist" | "Artstyle Pointillism" | "Artstyle Pop Art" | "Artstyle Psychedelic" | "Artstyle Renaissance" | "Artstyle Steampunk" | "Artstyle Surrealist" | "Artstyle Typography" | "Artstyle Watercolor" | "Futuristic Biomechanical" | "Futuristic Biomechanical Cyberpunk" | "Futuristic Cybernetic" | "Futuristic Cybernetic Robot" | "Futuristic Cyberpunk Cityscape" | "Futuristic Futuristic" | "Futuristic Retro Cyberpunk" | "Futuristic Retro Futurism" | "Futuristic Sci Fi" | "Futuristic Vaporwave" | "Game Bubble Bobble" | "Game Cyberpunk Game" | "Game Fighting Game" | "Game Gta" | "Game Mario" | "Game Minecraft" | "Game Pokemon" | "Game Retro Arcade" | "Game Retro Game" | "Game Rpg Fantasy Game" | "Game Strategy Game" | "Game Streetfighter" | "Game Zelda" | "Misc Architectural" | "Misc Disco" | "Misc Dreamscape" | "Misc Dystopian" | "Misc Fairy Tale" | "Misc Gothic" | "Misc Grunge" | "Misc Horror" | "Misc Kawaii" | "Misc Lovecraftian" | "Misc Macabre" | "Misc Manga" | "Misc Metropolis" | "Misc Minimalist" | "Misc Monochrome" | "Misc Nautical" | "Misc Space" | "Misc Stained Glass" | "Misc Techwear Fashion" | "Misc Tribal" | "Misc Zentangle" | "Papercraft Collage" | "Papercraft Flat Papercut" | "Papercraft Kirigami" | "Papercraft Paper Mache" | "Papercraft Paper Quilling" | "Papercraft Papercut Collage" | "Papercraft Papercut Shadow Box" | "Papercraft Stacked Papercut" | "Papercraft Thick Layered Papercut" | "Photo Alien" | "Photo Film Noir" | "Photo Glamour" | "Photo Hdr" | "Photo Iphone Photographic" | "Photo Long Exposure" | "Photo Neon Noir" | "Photo Silhouette" | "Photo Tilt Shift" | "Cinematic Diva" | "Abstract Expressionism" | "Academia" | "Action Figure" | "Adorable 3D Character" | "Adorable Kawaii" | "Art Deco" | "Art Nouveau" | "Astral Aura" | "Avant Garde" | "Baroque" | "Bauhaus Style Poster" | "Blueprint Schematic Drawing" | "Caricature" | "Cel Shaded Art" | "Character Design Sheet" | "Classicism Art" | "Color Field Painting" | "Colored Pencil Art" | "Conceptual Art" | "Constructivism" | "Cubism" | "Dadaism" | "Dark Fantasy" | "Dark Moody Atmosphere" | "Dmt Art Style" | "Doodle Art" | "Double Exposure" | "Dripping Paint Splatter Art" | "Expressionism" | "Faded Polaroid Photo" | "Fauvism" | "Flat 2d Art" | "Fortnite Art Style" | "Futurism" | "Glitchcore" | "Glo Fi" | "Googie Art Style" | "Graffiti Art" | "Harlem Renaissance Art" | "High Fashion" | "Idyllic" | "Impressionism" | "Infographic Drawing" | "Ink Dripping Drawing" | "Japanese Ink Drawing" | "Knolling Photography" | "Light Cheery Atmosphere" | "Logo Design" | "Luxurious Elegance" | "Macro Photography" | "Mandola Art" | "Marker Drawing" | "Medievalism" | "Minimalism" | "Neo Baroque" | "Neo Byzantine" | "Neo Futurism" | "Neo Impressionism" | "Neo Rococo" | "Neoclassicism" | "Op Art" | "Ornate And Intricate" | "Pencil Sketch Drawing" | "Pop Art 2" | "Rococo" | "Silhouette Art" | "Simple Vector Art" | "Sketchup" | "Steampunk 2" | "Surrealism" | "Suprematism" | "Terragen" | "Tranquil Relaxing Atmosphere" | "Sticker Designs" | "Vibrant Rim Light" | "Volumetric Lighting" | "Watercolor 2" | "Whimsical And Playful" | "Mk Chromolithography" | "Mk Cross Processing Print" | "Mk Dufaycolor Photograph" | "Mk Herbarium" | "Mk Punk Collage" | "Mk Mosaic" | "Mk Van Gogh" | "Mk Coloring Book" | "Mk Singer Sargent" | "Mk Pollock" | "Mk Basquiat" | "Mk Andy Warhol" | "Mk Halftone Print" | "Mk Gond Painting" | "Mk Albumen Print" | "Mk Aquatint Print" | "Mk Anthotype Print" | "Mk Inuit Carving" | "Mk Bromoil Print" | "Mk Calotype Print" | "Mk Color Sketchnote" | "Mk Cibulak Porcelain" | "Mk Alcohol Ink Art" | "Mk One Line Art" | "Mk Blacklight Paint" | "Mk Carnival Glass" | "Mk Cyanotype Print" | "Mk Cross Stitching" | "Mk Encaustic Paint" | "Mk Embroidery" | "Mk Gyotaku" | "Mk Luminogram" | "Mk Lite Brite Art" | "Mk Mokume Gane" | "Pebble Art" | "Mk Palekh" | "Mk Suminagashi" | "Mk Scrimshaw" | "Mk Shibori" | "Mk Vitreous Enamel" | "Mk Ukiyo E" | "Mk Vintage Airline Poster" | "Mk Vintage Travel Poster" | "Mk Bauhaus Style" | "Mk Afrofuturism" | "Mk Atompunk" | "Mk Constructivism" | "Mk Chicano Art" | "Mk De Stijl" | "Mk Dayak Art" | "Mk Fayum Portrait" | "Mk Illuminated Manuscript" | "Mk Kalighat Painting" | "Mk Madhubani Painting" | "Mk Pictorialism" | "Mk Pichwai Painting" | "Mk Patachitra Painting" | "Mk Samoan Art Inspired" | "Mk Tlingit Art" | "Mk Adnate Style" | "Mk Ron English Style" | "Mk Shepard Fairey Style">; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The image to upscale or vary. */ uov_image_url?: string | Blob | File; /** * The method to use for upscaling or varying. Default value: `"Disabled"` */ uov_method?: "Disabled" | "Vary (Subtle)" | "Vary (Strong)" | "Upscale (1.5x)" | "Upscale (2x)" | "Upscale (Fast 2x)"; }; export type FooocusInpaintInput = { /** * The size of the generated image. You can choose between some presets or * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` */ aspect_ratio?: string; /** * If set to false, the safety checker will be disabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * */ image_prompt_1?: ImagePrompt; /** * */ image_prompt_2?: ImagePrompt; /** * */ image_prompt_3?: ImagePrompt; /** * */ image_prompt_4?: ImagePrompt; /** * Describe what you want to inpaint. Default value: `""` */ inpaint_additional_prompt?: string; /** * If set to true, the initial preprocessing will be disabled. */ inpaint_disable_initial_latent?: boolean; /** * Version of Fooocus inpaint model Default value: `"v2.6"` */ inpaint_engine?: "None" | "v1" | "v2.5" | "v2.6"; /** * Positive value will make white area in the mask larger, negative value will * make white area smaller. (default is 0, always process before any mask * invert) */ inpaint_erode_or_dilate?: number; /** * The image to use as a reference for inpainting. */ inpaint_image_url: string | Blob | File; /** * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` */ inpaint_mode?: "Inpaint or Outpaint (default)" | "Improve Detail (face, hand, eyes, etc.)" | "Modify Content (add objects, change background, etc.)"; /** * The area to inpaint. Value 0 is same as "Only Masked" in A1111. Value 1 is * same as "Whole Image" in A1111. Only used in inpaint, not used in outpaint. * (Outpaint always use 1.0) Default value: `0.618` */ inpaint_respective_field?: number; /** * Same as the denoising strength in A1111 inpaint. Only used in inpaint, not * used in outpaint. (Outpaint always use 1.0) Default value: `1` */ inpaint_strength?: number; /** * If set to true, the mask will be inverted. */ invert_mask?: boolean; /** * The LoRAs to use for the image generation. You can use up to 5 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The image to use as a mask for the generated image. */ mask_image_url?: string | Blob | File; /** * Mixing Image Prompt and Inpaint */ mixing_image_prompt_and_inpaint?: boolean; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Number of images to generate in one request Default value: `1` */ num_images?: number; /** * The directions to outpaint. */ outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "png" | "jpeg" | "webp"; /** * If set to true, the advanced inpaint options ('inpaint_disable_initial_latent', * 'inpaint_engine', 'inpaint_strength', 'inpaint_respective_field', * 'inpaint_erode_or_dilate') will be overridden. * Otherwise, the default values will be used. */ override_inpaint_options?: boolean; /** * You can choose Speed or Quality Default value: `"Extreme Speed"` */ performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` */ prompt?: string; /** * Refiner (SDXL or SD 1.5) Default value: `"None"` */ refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; /** * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` */ refiner_switch?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The sharpness of the generated image. Use it to control how sharp the generated * image should be. Higher value means image and texture are sharper. Default value: `2` */ sharpness?: number; /** * The style to use. */ styles?: Array<"Fooocus V2" | "Fooocus Enhance" | "Fooocus Sharp" | "Fooocus Semi Realistic" | "Fooocus Masterpiece" | "Fooocus Photograph" | "Fooocus Negative" | "Fooocus Cinematic" | "SAI 3D Model" | "SAI Analog Film" | "SAI Anime" | "SAI Cinematic" | "SAI Comic Book" | "SAI Craft Clay" | "SAI Digital Art" | "SAI Enhance" | "SAI Fantasy Art" | "SAI Isometric" | "SAI Line Art" | "SAI Lowpoly" | "SAI Neonpunk" | "SAI Origami" | "SAI Photographic" | "SAI Pixel Art" | "SAI Texture" | "MRE Cinematic Dynamic" | "MRE Spontaneous Picture" | "MRE Artistic Vision" | "MRE Dark Dream" | "MRE Gloomy Art" | "MRE Bad Dream" | "MRE Underground" | "MRE Surreal Painting" | "MRE Dynamic Illustration" | "MRE Undead Art" | "MRE Elemental Art" | "MRE Space Art" | "MRE Ancient Illustration" | "MRE Brave Art" | "MRE Heroic Fantasy" | "MRE Dark Cyberpunk" | "MRE Lyrical Geometry" | "MRE Sumi E Symbolic" | "MRE Sumi E Detailed" | "MRE Manga" | "MRE Anime" | "MRE Comic" | "Ads Advertising" | "Ads Automotive" | "Ads Corporate" | "Ads Fashion Editorial" | "Ads Food Photography" | "Ads Gourmet Food Photography" | "Ads Luxury" | "Ads Real Estate" | "Ads Retail" | "Artstyle Abstract" | "Artstyle Abstract Expressionism" | "Artstyle Art Deco" | "Artstyle Art Nouveau" | "Artstyle Constructivist" | "Artstyle Cubist" | "Artstyle Expressionist" | "Artstyle Graffiti" | "Artstyle Hyperrealism" | "Artstyle Impressionist" | "Artstyle Pointillism" | "Artstyle Pop Art" | "Artstyle Psychedelic" | "Artstyle Renaissance" | "Artstyle Steampunk" | "Artstyle Surrealist" | "Artstyle Typography" | "Artstyle Watercolor" | "Futuristic Biomechanical" | "Futuristic Biomechanical Cyberpunk" | "Futuristic Cybernetic" | "Futuristic Cybernetic Robot" | "Futuristic Cyberpunk Cityscape" | "Futuristic Futuristic" | "Futuristic Retro Cyberpunk" | "Futuristic Retro Futurism" | "Futuristic Sci Fi" | "Futuristic Vaporwave" | "Game Bubble Bobble" | "Game Cyberpunk Game" | "Game Fighting Game" | "Game Gta" | "Game Mario" | "Game Minecraft" | "Game Pokemon" | "Game Retro Arcade" | "Game Retro Game" | "Game Rpg Fantasy Game" | "Game Strategy Game" | "Game Streetfighter" | "Game Zelda" | "Misc Architectural" | "Misc Disco" | "Misc Dreamscape" | "Misc Dystopian" | "Misc Fairy Tale" | "Misc Gothic" | "Misc Grunge" | "Misc Horror" | "Misc Kawaii" | "Misc Lovecraftian" | "Misc Macabre" | "Misc Manga" | "Misc Metropolis" | "Misc Minimalist" | "Misc Monochrome" | "Misc Nautical" | "Misc Space" | "Misc Stained Glass" | "Misc Techwear Fashion" | "Misc Tribal" | "Misc Zentangle" | "Papercraft Collage" | "Papercraft Flat Papercut" | "Papercraft Kirigami" | "Papercraft Paper Mache" | "Papercraft Paper Quilling" | "Papercraft Papercut Collage" | "Papercraft Papercut Shadow Box" | "Papercraft Stacked Papercut" | "Papercraft Thick Layered Papercut" | "Photo Alien" | "Photo Film Noir" | "Photo Glamour" | "Photo Hdr" | "Photo Iphone Photographic" | "Photo Long Exposure" | "Photo Neon Noir" | "Photo Silhouette" | "Photo Tilt Shift" | "Cinematic Diva" | "Abstract Expressionism" | "Academia" | "Action Figure" | "Adorable 3D Character" | "Adorable Kawaii" | "Art Deco" | "Art Nouveau" | "Astral Aura" | "Avant Garde" | "Baroque" | "Bauhaus Style Poster" | "Blueprint Schematic Drawing" | "Caricature" | "Cel Shaded Art" | "Character Design Sheet" | "Classicism Art" | "Color Field Painting" | "Colored Pencil Art" | "Conceptual Art" | "Constructivism" | "Cubism" | "Dadaism" | "Dark Fantasy" | "Dark Moody Atmosphere" | "Dmt Art Style" | "Doodle Art" | "Double Exposure" | "Dripping Paint Splatter Art" | "Expressionism" | "Faded Polaroid Photo" | "Fauvism" | "Flat 2d Art" | "Fortnite Art Style" | "Futurism" | "Glitchcore" | "Glo Fi" | "Googie Art Style" | "Graffiti Art" | "Harlem Renaissance Art" | "High Fashion" | "Idyllic" | "Impressionism" | "Infographic Drawing" | "Ink Dripping Drawing" | "Japanese Ink Drawing" | "Knolling Photography" | "Light Cheery Atmosphere" | "Logo Design" | "Luxurious Elegance" | "Macro Photography" | "Mandola Art" | "Marker Drawing" | "Medievalism" | "Minimalism" | "Neo Baroque" | "Neo Byzantine" | "Neo Futurism" | "Neo Impressionism" | "Neo Rococo" | "Neoclassicism" | "Op Art" | "Ornate And Intricate" | "Pencil Sketch Drawing" | "Pop Art 2" | "Rococo" | "Silhouette Art" | "Simple Vector Art" | "Sketchup" | "Steampunk 2" | "Surrealism" | "Suprematism" | "Terragen" | "Tranquil Relaxing Atmosphere" | "Sticker Designs" | "Vibrant Rim Light" | "Volumetric Lighting" | "Watercolor 2" | "Whimsical And Playful" | "Mk Chromolithography" | "Mk Cross Processing Print" | "Mk Dufaycolor Photograph" | "Mk Herbarium" | "Mk Punk Collage" | "Mk Mosaic" | "Mk Van Gogh" | "Mk Coloring Book" | "Mk Singer Sargent" | "Mk Pollock" | "Mk Basquiat" | "Mk Andy Warhol" | "Mk Halftone Print" | "Mk Gond Painting" | "Mk Albumen Print" | "Mk Aquatint Print" | "Mk Anthotype Print" | "Mk Inuit Carving" | "Mk Bromoil Print" | "Mk Calotype Print" | "Mk Color Sketchnote" | "Mk Cibulak Porcelain" | "Mk Alcohol Ink Art" | "Mk One Line Art" | "Mk Blacklight Paint" | "Mk Carnival Glass" | "Mk Cyanotype Print" | "Mk Cross Stitching" | "Mk Encaustic Paint" | "Mk Embroidery" | "Mk Gyotaku" | "Mk Luminogram" | "Mk Lite Brite Art" | "Mk Mokume Gane" | "Pebble Art" | "Mk Palekh" | "Mk Suminagashi" | "Mk Scrimshaw" | "Mk Shibori" | "Mk Vitreous Enamel" | "Mk Ukiyo E" | "Mk Vintage Airline Poster" | "Mk Vintage Travel Poster" | "Mk Bauhaus Style" | "Mk Afrofuturism" | "Mk Atompunk" | "Mk Constructivism" | "Mk Chicano Art" | "Mk De Stijl" | "Mk Dayak Art" | "Mk Fayum Portrait" | "Mk Illuminated Manuscript" | "Mk Kalighat Painting" | "Mk Madhubani Painting" | "Mk Pictorialism" | "Mk Pichwai Painting" | "Mk Patachitra Painting" | "Mk Samoan Art Inspired" | "Mk Tlingit Art" | "Mk Adnate Style" | "Mk Ron English Style" | "Mk Shepard Fairey Style">; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type fooocusInput = { /** * The size of the generated image. You can choose between some presets or * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` */ aspect_ratio?: string; /** * The stop at value of the control image. Use it to control how much the generated image * should look like the control image. Default value: `1` */ control_image_stop_at?: number; /** * The image to use as a reference for the generated image. */ control_image_url?: string | Blob | File; /** * The strength of the control image. Use it to control how much the generated image * should look like the control image. Default value: `1` */ control_image_weight?: number; /** * The type of image control Default value: `"PyraCanny"` */ control_type?: "ImagePrompt" | "PyraCanny" | "CPDS" | "FaceSwap"; /** * If set to false, the safety checker will be disabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The image to use as a reference for inpainting. */ inpaint_image_url?: string | Blob | File; /** * The LoRAs to use for the image generation. You can use up to 5 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The image to use as a mask for the generated image. */ mask_image_url?: string | Blob | File; /** * */ mixing_image_prompt_and_inpaint?: boolean; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Number of images to generate in one request Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "png" | "jpeg" | "webp"; /** * You can choose Speed or Quality Default value: `"Extreme Speed"` */ performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` */ prompt?: string; /** * Refiner (SDXL or SD 1.5) Default value: `"None"` */ refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; /** * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` */ refiner_switch?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The sharpness of the generated image. Use it to control how sharp the generated * image should be. Higher value means image and texture are sharper. Default value: `2` */ sharpness?: number; /** * The style to use. */ styles?: Array<"Fooocus V2" | "Fooocus Enhance" | "Fooocus Sharp" | "Fooocus Semi Realistic" | "Fooocus Masterpiece" | "Fooocus Photograph" | "Fooocus Negative" | "Fooocus Cinematic" | "SAI 3D Model" | "SAI Analog Film" | "SAI Anime" | "SAI Cinematic" | "SAI Comic Book" | "SAI Craft Clay" | "SAI Digital Art" | "SAI Enhance" | "SAI Fantasy Art" | "SAI Isometric" | "SAI Line Art" | "SAI Lowpoly" | "SAI Neonpunk" | "SAI Origami" | "SAI Photographic" | "SAI Pixel Art" | "SAI Texture" | "MRE Cinematic Dynamic" | "MRE Spontaneous Picture" | "MRE Artistic Vision" | "MRE Dark Dream" | "MRE Gloomy Art" | "MRE Bad Dream" | "MRE Underground" | "MRE Surreal Painting" | "MRE Dynamic Illustration" | "MRE Undead Art" | "MRE Elemental Art" | "MRE Space Art" | "MRE Ancient Illustration" | "MRE Brave Art" | "MRE Heroic Fantasy" | "MRE Dark Cyberpunk" | "MRE Lyrical Geometry" | "MRE Sumi E Symbolic" | "MRE Sumi E Detailed" | "MRE Manga" | "MRE Anime" | "MRE Comic" | "Ads Advertising" | "Ads Automotive" | "Ads Corporate" | "Ads Fashion Editorial" | "Ads Food Photography" | "Ads Gourmet Food Photography" | "Ads Luxury" | "Ads Real Estate" | "Ads Retail" | "Artstyle Abstract" | "Artstyle Abstract Expressionism" | "Artstyle Art Deco" | "Artstyle Art Nouveau" | "Artstyle Constructivist" | "Artstyle Cubist" | "Artstyle Expressionist" | "Artstyle Graffiti" | "Artstyle Hyperrealism" | "Artstyle Impressionist" | "Artstyle Pointillism" | "Artstyle Pop Art" | "Artstyle Psychedelic" | "Artstyle Renaissance" | "Artstyle Steampunk" | "Artstyle Surrealist" | "Artstyle Typography" | "Artstyle Watercolor" | "Futuristic Biomechanical" | "Futuristic Biomechanical Cyberpunk" | "Futuristic Cybernetic" | "Futuristic Cybernetic Robot" | "Futuristic Cyberpunk Cityscape" | "Futuristic Futuristic" | "Futuristic Retro Cyberpunk" | "Futuristic Retro Futurism" | "Futuristic Sci Fi" | "Futuristic Vaporwave" | "Game Bubble Bobble" | "Game Cyberpunk Game" | "Game Fighting Game" | "Game Gta" | "Game Mario" | "Game Minecraft" | "Game Pokemon" | "Game Retro Arcade" | "Game Retro Game" | "Game Rpg Fantasy Game" | "Game Strategy Game" | "Game Streetfighter" | "Game Zelda" | "Misc Architectural" | "Misc Disco" | "Misc Dreamscape" | "Misc Dystopian" | "Misc Fairy Tale" | "Misc Gothic" | "Misc Grunge" | "Misc Horror" | "Misc Kawaii" | "Misc Lovecraftian" | "Misc Macabre" | "Misc Manga" | "Misc Metropolis" | "Misc Minimalist" | "Misc Monochrome" | "Misc Nautical" | "Misc Space" | "Misc Stained Glass" | "Misc Techwear Fashion" | "Misc Tribal" | "Misc Zentangle" | "Papercraft Collage" | "Papercraft Flat Papercut" | "Papercraft Kirigami" | "Papercraft Paper Mache" | "Papercraft Paper Quilling" | "Papercraft Papercut Collage" | "Papercraft Papercut Shadow Box" | "Papercraft Stacked Papercut" | "Papercraft Thick Layered Papercut" | "Photo Alien" | "Photo Film Noir" | "Photo Glamour" | "Photo Hdr" | "Photo Iphone Photographic" | "Photo Long Exposure" | "Photo Neon Noir" | "Photo Silhouette" | "Photo Tilt Shift" | "Cinematic Diva" | "Abstract Expressionism" | "Academia" | "Action Figure" | "Adorable 3D Character" | "Adorable Kawaii" | "Art Deco" | "Art Nouveau" | "Astral Aura" | "Avant Garde" | "Baroque" | "Bauhaus Style Poster" | "Blueprint Schematic Drawing" | "Caricature" | "Cel Shaded Art" | "Character Design Sheet" | "Classicism Art" | "Color Field Painting" | "Colored Pencil Art" | "Conceptual Art" | "Constructivism" | "Cubism" | "Dadaism" | "Dark Fantasy" | "Dark Moody Atmosphere" | "Dmt Art Style" | "Doodle Art" | "Double Exposure" | "Dripping Paint Splatter Art" | "Expressionism" | "Faded Polaroid Photo" | "Fauvism" | "Flat 2d Art" | "Fortnite Art Style" | "Futurism" | "Glitchcore" | "Glo Fi" | "Googie Art Style" | "Graffiti Art" | "Harlem Renaissance Art" | "High Fashion" | "Idyllic" | "Impressionism" | "Infographic Drawing" | "Ink Dripping Drawing" | "Japanese Ink Drawing" | "Knolling Photography" | "Light Cheery Atmosphere" | "Logo Design" | "Luxurious Elegance" | "Macro Photography" | "Mandola Art" | "Marker Drawing" | "Medievalism" | "Minimalism" | "Neo Baroque" | "Neo Byzantine" | "Neo Futurism" | "Neo Impressionism" | "Neo Rococo" | "Neoclassicism" | "Op Art" | "Ornate And Intricate" | "Pencil Sketch Drawing" | "Pop Art 2" | "Rococo" | "Silhouette Art" | "Simple Vector Art" | "Sketchup" | "Steampunk 2" | "Surrealism" | "Suprematism" | "Terragen" | "Tranquil Relaxing Atmosphere" | "Sticker Designs" | "Vibrant Rim Light" | "Volumetric Lighting" | "Watercolor 2" | "Whimsical And Playful" | "Mk Chromolithography" | "Mk Cross Processing Print" | "Mk Dufaycolor Photograph" | "Mk Herbarium" | "Mk Punk Collage" | "Mk Mosaic" | "Mk Van Gogh" | "Mk Coloring Book" | "Mk Singer Sargent" | "Mk Pollock" | "Mk Basquiat" | "Mk Andy Warhol" | "Mk Halftone Print" | "Mk Gond Painting" | "Mk Albumen Print" | "Mk Aquatint Print" | "Mk Anthotype Print" | "Mk Inuit Carving" | "Mk Bromoil Print" | "Mk Calotype Print" | "Mk Color Sketchnote" | "Mk Cibulak Porcelain" | "Mk Alcohol Ink Art" | "Mk One Line Art" | "Mk Blacklight Paint" | "Mk Carnival Glass" | "Mk Cyanotype Print" | "Mk Cross Stitching" | "Mk Encaustic Paint" | "Mk Embroidery" | "Mk Gyotaku" | "Mk Luminogram" | "Mk Lite Brite Art" | "Mk Mokume Gane" | "Pebble Art" | "Mk Palekh" | "Mk Suminagashi" | "Mk Scrimshaw" | "Mk Shibori" | "Mk Vitreous Enamel" | "Mk Ukiyo E" | "Mk Vintage Airline Poster" | "Mk Vintage Travel Poster" | "Mk Bauhaus Style" | "Mk Afrofuturism" | "Mk Atompunk" | "Mk Constructivism" | "Mk Chicano Art" | "Mk De Stijl" | "Mk Dayak Art" | "Mk Fayum Portrait" | "Mk Illuminated Manuscript" | "Mk Kalighat Painting" | "Mk Madhubani Painting" | "Mk Pictorialism" | "Mk Pichwai Painting" | "Mk Patachitra Painting" | "Mk Samoan Art Inspired" | "Mk Tlingit Art" | "Mk Adnate Style" | "Mk Ron English Style" | "Mk Shepard Fairey Style">; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type fooocusOutput = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image file info. */ images: Array; /** * The time taken for the generation process. */ timings: unknown; }; export type FooocusUpscaleOrVaryInput = { /** * The size of the generated image. You can choose between some presets or * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` */ aspect_ratio?: string; /** * If set to false, the safety checker will be disabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * */ image_prompt_1?: ImagePrompt; /** * */ image_prompt_2?: ImagePrompt; /** * */ image_prompt_3?: ImagePrompt; /** * */ image_prompt_4?: ImagePrompt; /** * The LoRAs to use for the image generation. You can use up to 5 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * Mixing Image Prompt and Vary/Upscale */ mixing_image_prompt_and_vary_upscale?: boolean; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Number of images to generate in one request Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "png" | "jpeg" | "webp"; /** * You can choose Speed or Quality Default value: `"Extreme Speed"` */ performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` */ prompt?: string; /** * Refiner (SDXL or SD 1.5) Default value: `"None"` */ refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; /** * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` */ refiner_switch?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The sharpness of the generated image. Use it to control how sharp the generated * image should be. Higher value means image and texture are sharper. Default value: `2` */ sharpness?: number; /** * The style to use. */ styles?: Array<"Fooocus V2" | "Fooocus Enhance" | "Fooocus Sharp" | "Fooocus Semi Realistic" | "Fooocus Masterpiece" | "Fooocus Photograph" | "Fooocus Negative" | "Fooocus Cinematic" | "SAI 3D Model" | "SAI Analog Film" | "SAI Anime" | "SAI Cinematic" | "SAI Comic Book" | "SAI Craft Clay" | "SAI Digital Art" | "SAI Enhance" | "SAI Fantasy Art" | "SAI Isometric" | "SAI Line Art" | "SAI Lowpoly" | "SAI Neonpunk" | "SAI Origami" | "SAI Photographic" | "SAI Pixel Art" | "SAI Texture" | "MRE Cinematic Dynamic" | "MRE Spontaneous Picture" | "MRE Artistic Vision" | "MRE Dark Dream" | "MRE Gloomy Art" | "MRE Bad Dream" | "MRE Underground" | "MRE Surreal Painting" | "MRE Dynamic Illustration" | "MRE Undead Art" | "MRE Elemental Art" | "MRE Space Art" | "MRE Ancient Illustration" | "MRE Brave Art" | "MRE Heroic Fantasy" | "MRE Dark Cyberpunk" | "MRE Lyrical Geometry" | "MRE Sumi E Symbolic" | "MRE Sumi E Detailed" | "MRE Manga" | "MRE Anime" | "MRE Comic" | "Ads Advertising" | "Ads Automotive" | "Ads Corporate" | "Ads Fashion Editorial" | "Ads Food Photography" | "Ads Gourmet Food Photography" | "Ads Luxury" | "Ads Real Estate" | "Ads Retail" | "Artstyle Abstract" | "Artstyle Abstract Expressionism" | "Artstyle Art Deco" | "Artstyle Art Nouveau" | "Artstyle Constructivist" | "Artstyle Cubist" | "Artstyle Expressionist" | "Artstyle Graffiti" | "Artstyle Hyperrealism" | "Artstyle Impressionist" | "Artstyle Pointillism" | "Artstyle Pop Art" | "Artstyle Psychedelic" | "Artstyle Renaissance" | "Artstyle Steampunk" | "Artstyle Surrealist" | "Artstyle Typography" | "Artstyle Watercolor" | "Futuristic Biomechanical" | "Futuristic Biomechanical Cyberpunk" | "Futuristic Cybernetic" | "Futuristic Cybernetic Robot" | "Futuristic Cyberpunk Cityscape" | "Futuristic Futuristic" | "Futuristic Retro Cyberpunk" | "Futuristic Retro Futurism" | "Futuristic Sci Fi" | "Futuristic Vaporwave" | "Game Bubble Bobble" | "Game Cyberpunk Game" | "Game Fighting Game" | "Game Gta" | "Game Mario" | "Game Minecraft" | "Game Pokemon" | "Game Retro Arcade" | "Game Retro Game" | "Game Rpg Fantasy Game" | "Game Strategy Game" | "Game Streetfighter" | "Game Zelda" | "Misc Architectural" | "Misc Disco" | "Misc Dreamscape" | "Misc Dystopian" | "Misc Fairy Tale" | "Misc Gothic" | "Misc Grunge" | "Misc Horror" | "Misc Kawaii" | "Misc Lovecraftian" | "Misc Macabre" | "Misc Manga" | "Misc Metropolis" | "Misc Minimalist" | "Misc Monochrome" | "Misc Nautical" | "Misc Space" | "Misc Stained Glass" | "Misc Techwear Fashion" | "Misc Tribal" | "Misc Zentangle" | "Papercraft Collage" | "Papercraft Flat Papercut" | "Papercraft Kirigami" | "Papercraft Paper Mache" | "Papercraft Paper Quilling" | "Papercraft Papercut Collage" | "Papercraft Papercut Shadow Box" | "Papercraft Stacked Papercut" | "Papercraft Thick Layered Papercut" | "Photo Alien" | "Photo Film Noir" | "Photo Glamour" | "Photo Hdr" | "Photo Iphone Photographic" | "Photo Long Exposure" | "Photo Neon Noir" | "Photo Silhouette" | "Photo Tilt Shift" | "Cinematic Diva" | "Abstract Expressionism" | "Academia" | "Action Figure" | "Adorable 3D Character" | "Adorable Kawaii" | "Art Deco" | "Art Nouveau" | "Astral Aura" | "Avant Garde" | "Baroque" | "Bauhaus Style Poster" | "Blueprint Schematic Drawing" | "Caricature" | "Cel Shaded Art" | "Character Design Sheet" | "Classicism Art" | "Color Field Painting" | "Colored Pencil Art" | "Conceptual Art" | "Constructivism" | "Cubism" | "Dadaism" | "Dark Fantasy" | "Dark Moody Atmosphere" | "Dmt Art Style" | "Doodle Art" | "Double Exposure" | "Dripping Paint Splatter Art" | "Expressionism" | "Faded Polaroid Photo" | "Fauvism" | "Flat 2d Art" | "Fortnite Art Style" | "Futurism" | "Glitchcore" | "Glo Fi" | "Googie Art Style" | "Graffiti Art" | "Harlem Renaissance Art" | "High Fashion" | "Idyllic" | "Impressionism" | "Infographic Drawing" | "Ink Dripping Drawing" | "Japanese Ink Drawing" | "Knolling Photography" | "Light Cheery Atmosphere" | "Logo Design" | "Luxurious Elegance" | "Macro Photography" | "Mandola Art" | "Marker Drawing" | "Medievalism" | "Minimalism" | "Neo Baroque" | "Neo Byzantine" | "Neo Futurism" | "Neo Impressionism" | "Neo Rococo" | "Neoclassicism" | "Op Art" | "Ornate And Intricate" | "Pencil Sketch Drawing" | "Pop Art 2" | "Rococo" | "Silhouette Art" | "Simple Vector Art" | "Sketchup" | "Steampunk 2" | "Surrealism" | "Suprematism" | "Terragen" | "Tranquil Relaxing Atmosphere" | "Sticker Designs" | "Vibrant Rim Light" | "Volumetric Lighting" | "Watercolor 2" | "Whimsical And Playful" | "Mk Chromolithography" | "Mk Cross Processing Print" | "Mk Dufaycolor Photograph" | "Mk Herbarium" | "Mk Punk Collage" | "Mk Mosaic" | "Mk Van Gogh" | "Mk Coloring Book" | "Mk Singer Sargent" | "Mk Pollock" | "Mk Basquiat" | "Mk Andy Warhol" | "Mk Halftone Print" | "Mk Gond Painting" | "Mk Albumen Print" | "Mk Aquatint Print" | "Mk Anthotype Print" | "Mk Inuit Carving" | "Mk Bromoil Print" | "Mk Calotype Print" | "Mk Color Sketchnote" | "Mk Cibulak Porcelain" | "Mk Alcohol Ink Art" | "Mk One Line Art" | "Mk Blacklight Paint" | "Mk Carnival Glass" | "Mk Cyanotype Print" | "Mk Cross Stitching" | "Mk Encaustic Paint" | "Mk Embroidery" | "Mk Gyotaku" | "Mk Luminogram" | "Mk Lite Brite Art" | "Mk Mokume Gane" | "Pebble Art" | "Mk Palekh" | "Mk Suminagashi" | "Mk Scrimshaw" | "Mk Shibori" | "Mk Vitreous Enamel" | "Mk Ukiyo E" | "Mk Vintage Airline Poster" | "Mk Vintage Travel Poster" | "Mk Bauhaus Style" | "Mk Afrofuturism" | "Mk Atompunk" | "Mk Constructivism" | "Mk Chicano Art" | "Mk De Stijl" | "Mk Dayak Art" | "Mk Fayum Portrait" | "Mk Illuminated Manuscript" | "Mk Kalighat Painting" | "Mk Madhubani Painting" | "Mk Pictorialism" | "Mk Pichwai Painting" | "Mk Patachitra Painting" | "Mk Samoan Art Inspired" | "Mk Tlingit Art" | "Mk Adnate Style" | "Mk Ron English Style" | "Mk Shepard Fairey Style">; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The image to upscale or vary. */ uov_image_url: string | Blob | File; /** * The method to use for upscaling or varying. Default value: `"Vary (Strong)"` */ uov_method?: "Disabled" | "Vary (Subtle)" | "Vary (Strong)" | "Upscale (1.5x)" | "Upscale (2x)" | "Upscale (Fast 2x)"; }; export type ForceParams = { /** * Direction of force in degrees. 0 = right, 90 = up, 180 = left, 270 = down. */ angle: number; /** * Strength of the force (30 = gentle push, 400 = strong impact). */ magnitude: number; /** * Mass of the object (1.0 to 4.0). Use -1 to let the model infer mass automatically. Default value: `-1` */ mass?: number; /** * Normalized x position of the force application point (0 = left, 1 = right). */ x: number; /** * Normalized y position of the force application point (0 = top, 1 = bottom). */ y: number; }; export type Frame = { /** * URL of the frame */ url: string; }; export type FrameInput = { /** * Type of frame to extract: first, middle, or last frame of the video Default value: `"first"` */ frame_type?: "first" | "middle" | "last"; /** * URL of the video file to use as the video track */ video_url: string | Blob | File; }; export type FramepackFlf2vInput = { /** * The aspect ratio of the video to generate. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * Classifier-Free Guidance scale for the generation. Default value: `1` */ cfg_scale?: number; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * URL of the end image input. */ end_image_url: string | Blob | File; /** * Guidance scale for the generation. Default value: `10` */ guidance_scale?: number; /** * URL of the image input. */ image_url: string | Blob | File; /** * Negative prompt for video generation. Default value: `""` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `240` */ num_frames?: number; /** * Text prompt for video generation (max 500 characters). */ prompt: string; /** * The resolution of the video to generate. 720p generations cost 1.5x more than 480p generations. Default value: `"480p"` */ resolution?: "720p" | "480p"; /** * The seed to use for generating the video. */ seed?: number; /** * Determines the influence of the final frame on the generated video. Higher values result in the output being more heavily influenced by the last frame. Default value: `0.8` */ strength?: number; }; export type framepackInput = { /** * The aspect ratio of the video to generate. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * Classifier-Free Guidance scale for the generation. Default value: `1` */ cfg_scale?: number; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * Guidance scale for the generation. Default value: `10` */ guidance_scale?: number; /** * URL of the image input. */ image_url: string | Blob | File; /** * Negative prompt for video generation. Default value: `""` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `180` */ num_frames?: number; /** * Text prompt for video generation (max 500 characters). */ prompt: string; /** * The resolution of the video to generate. 720p generations cost 1.5x more than 480p generations. Default value: `"480p"` */ resolution?: "720p" | "480p"; /** * The seed to use for generating the video. */ seed?: number; }; export type GeminiFlashEditMultiInput = { /** * List of URLs of input images for editing */ input_image_urls: Array; /** * The prompt for image generation or editing */ prompt: string; }; export type GeminiFlashEditOutput = { /** * Text description or response from Gemini */ description: string; /** * The generated or edited image */ image: Image; }; export type GeminiTtsInput = { /** * Language for multilingual synthesis. When set, steers the model to speak in the specified language. Supports 24 GA languages and 60+ Preview languages. If not set, the model auto-detects the language from the text. */ language_code?: "Arabic (Egypt)" | "Bangla (Bangladesh)" | "Dutch (Netherlands)" | "English (India)" | "English (US)" | "French (France)" | "German (Germany)" | "Hindi (India)" | "Indonesian (Indonesia)" | "Italian (Italy)" | "Japanese (Japan)" | "Korean (South Korea)" | "Marathi (India)" | "Polish (Poland)" | "Portuguese (Brazil)" | "Romanian (Romania)" | "Russian (Russia)" | "Spanish (Spain)" | "Tamil (India)" | "Telugu (India)" | "Thai (Thailand)" | "Turkish (Turkey)" | "Ukrainian (Ukraine)" | "Vietnamese (Vietnam)" | "Afrikaans (South Africa)" | "Albanian (Albania)" | "Amharic (Ethiopia)" | "Arabic (World)" | "Armenian (Armenia)" | "Azerbaijani (Azerbaijan)" | "Basque (Spain)" | "Belarusian (Belarus)" | "Bulgarian (Bulgaria)" | "Burmese (Myanmar)" | "Catalan (Spain)" | "Cebuano (Philippines)" | "Chinese Mandarin (China)" | "Chinese Mandarin (Taiwan)" | "Croatian (Croatia)" | "Czech (Czech Republic)" | "Danish (Denmark)" | "English (Australia)" | "English (UK)" | "Estonian (Estonia)" | "Filipino (Philippines)" | "Finnish (Finland)" | "French (Canada)" | "Galician (Spain)" | "Georgian (Georgia)" | "Greek (Greece)" | "Gujarati (India)" | "Haitian Creole (Haiti)" | "Hebrew (Israel)" | "Hungarian (Hungary)" | "Icelandic (Iceland)" | "Javanese (Java)" | "Kannada (India)" | "Konkani (India)" | "Lao (Laos)" | "Latin (Vatican City)" | "Latvian (Latvia)" | "Lithuanian (Lithuania)" | "Luxembourgish (Luxembourg)" | "Macedonian (North Macedonia)" | "Maithili (India)" | "Malagasy (Madagascar)" | "Malay (Malaysia)" | "Malayalam (India)" | "Mongolian (Mongolia)" | "Nepali (Nepal)" | "Norwegian Bokmal (Norway)" | "Norwegian Nynorsk (Norway)" | "Odia (India)" | "Pashto (Afghanistan)" | "Persian (Iran)" | "Portuguese (Portugal)" | "Punjabi (India)" | "Serbian (Serbia)" | "Sindhi (India)" | "Sinhala (Sri Lanka)" | "Slovak (Slovakia)" | "Slovenian (Slovenia)" | "Spanish (Latin America)" | "Spanish (Mexico)" | "Swahili (Kenya)" | "Swedish (Sweden)" | "Urdu (Pakistan)"; /** * Which Gemini TTS model to use. gemini-2.5-flash-tts: low latency, cost-efficient for everyday applications (recommended). gemini-2.5-pro-tts: highest quality, best for structured workflows like podcasts, audiobooks, and customer support. Default value: `"gemini-2.5-flash-tts"` */ model?: "gemini-2.5-flash-tts" | "gemini-2.5-pro-tts"; /** * Audio output format. mp3: compressed, small file size (recommended). wav: uncompressed PCM wrapped in WAV (24 kHz, 16-bit mono). ogg_opus: Ogg container with Opus codec, good quality-to-size ratio. Default value: `"mp3"` */ output_format?: "wav" | "mp3" | "ogg_opus"; /** * The text to convert to speech. Gemini TTS supports natural-language prompting for style, pace, accent, and emotional expression — include delivery instructions inline with the text (e.g. 'Say cheerfully: Have a wonderful day!'). For multi-speaker synthesis, prefix lines with speaker aliases defined in the speakers field (e.g. 'Alice: Hello! * Bob: Hi!'). Supports inline pace/style markers like [slowly], [whispering], [excited], [extremely fast]. */ prompt: string; /** * Multi-speaker voice configuration. When set, enables multi-speaker synthesis where different parts of the text are spoken by different voices. Each speaker needs a voice and a speaker_id (alias) that matches prefixes in the prompt. Requires gemini-2.5-pro-tts or gemini-2.5-flash-tts model. Not supported with gemini-2.5-flash-lite-preview-tts. */ speakers?: Array; /** * Optional style and delivery instructions prepended to the prompt. Controls expressiveness, accent, pace, tone, and emotional expression using natural language. Use this to separate style control from the text content. Examples: 'Speak warmly and slowly', 'Read this as a dramatic newscast', 'Use a British accent with a cheerful tone', 'Whisper mysteriously'. */ style_instructions?: string; /** * Controls the randomness of the speech output. Higher values produce more creative and varied delivery, while lower values make the output more predictable and focused. Default value: `1` */ temperature?: number; /** * Voice preset for single-speaker synthesis. 30 distinct voices are available. Ignored when speakers is set. Popular choices: Kore (strong, firm female), Puck (upbeat, lively male), Charon (calm, professional male), Zephyr (bright, clear female), Aoede (warm, melodic female). Default value: `"Kore"` */ voice?: "Achernar" | "Achird" | "Algenib" | "Algieba" | "Alnilam" | "Aoede" | "Autonoe" | "Callirrhoe" | "Charon" | "Despina" | "Enceladus" | "Erinome" | "Fenrir" | "Gacrux" | "Iapetus" | "Kore" | "Laomedeia" | "Leda" | "Orus" | "Pulcherrima" | "Puck" | "Rasalgethi" | "Sadachbia" | "Sadaltager" | "Schedar" | "Sulafat" | "Umbriel" | "Vindemiatrix" | "Zephyr" | "Zubenelgenubi"; }; export type GeneralRembgInput = { /** * Single VP9 video with alpha channel or two videos (rgb and alpha) in H264 format. H264 is recommended for better RGB quality. Default value: `"vp9"` */ output_codec?: "vp9" | "h264"; /** * Improves the quality of the extracted object's edges. Default value: `true` */ refine_foreground_edges?: boolean; /** * Set to False if the subject is not a person. Default value: `true` */ subject_is_person?: boolean; /** * */ video_url: string | Blob | File; }; export type GeneralRembgOutput = { /** * */ video: Array; }; export type GenerateInput = { /** * Greater means more natural vocals. Lower means sharper instrumentals. We recommend 0.7. Default value: `0.7` */ balance_strength?: number; /** * The beats per minute of the song. This can be set to an integer or the literal string "auto" to pick a suitable bpm based on the tags. Set bpm to null to not condition the model on bpm information. Default value: `auto` */ bpm?: number | string; /** * The lyrics sung in the generated song. An empty string will generate an instrumental track. */ lyrics_prompt?: string; /** * Generating 2 songs costs 1.5x the price of generating 1 song. Also, note that using the same seed may not result in identical songs if the number of songs generated is changed. Default value: `1` */ num_songs?: number; /** * The bit rate to use for mp3 and m4a formats. Not available for other formats. */ output_bit_rate?: "128" | "192" | "256" | "320"; /** * Default value: `"wav"` */ output_format?: "flac" | "mp3" | "wav" | "ogg" | "m4a"; /** * A description of the track you want to generate. This prompt will be used to automatically generate the tags and lyrics unless you manually set them. For example, if you set prompt and tags, then the prompt will be used to generate only the lyrics. */ prompt?: string; /** * Controls how strongly your prompt influences the output. Greater values adhere more to the prompt but sound less natural. (This is CFG.) Default value: `2` */ prompt_strength?: number; /** * The seed to use for generation. Will pick a random seed if not provided. Repeating a request with identical parameters (must use lyrics and tags, not prompt) and the same seed will generate the same song. */ seed?: number; /** * Tags/styles of the music to generate. You can view a list of all available tags at https://sonauto.ai/tag-explorer. */ tags?: Array; }; export type GenerateOutput = { /** * The generated audio files. */ audio: Array; /** * The lyrics used for generation. */ lyrics?: string; /** * The seed used for generation. This can be used to generate an identical song by passing the same parameters with this seed in a future request. */ seed: number; /** * The style tags used for generation. */ tags?: Array; }; export type GenFillInput = { /** * Input Image to erase from */ image_url: string | Blob | File; /** * The URL of the binary mask image that represents the area that will be cleaned. */ mask_url: string | Blob | File; /** * The negative prompt you would like to use to generate images. Default value: `""` */ negative_prompt?: string; /** * Number of Images to generate. Default value: `1` */ num_images?: number; /** * The prompt you would like to use to generate images. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ghiblifyInput = { /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The URL of the image to upscale. */ image_url: string | Blob | File; /** * The seed to use for the upscale. If not provided, a random seed will be used. */ seed?: number; }; export type GifToVideoInput = { /** * URL of the GIF file to convert */ gif_url: string | Blob | File; /** * Output video format. 'webm' (VP9) supports transparency, 'mp4' (H.264) is more widely compatible but does not support transparency. Default value: `"webm"` */ output_format?: "webm" | "mp4"; /** * Preserve GIF transparency as alpha channel. Only works with webm format. When False or when using mp4, transparent areas become black. Default value: `true` */ preserve_transparency?: boolean; /** * Quality level for the output video. Low=smaller file, lossless=best quality. 'lossless' only available for webm. Default value: `"medium"` */ quality?: "low" | "medium" | "high" | "lossless"; }; export type GlmImageInput = { /** * If True, the prompt will be enhanced using an LLM for more detailed and higher quality results. */ enable_prompt_expansion?: boolean; /** * Enable NSFW safety checking on the generated images. Default value: `true` */ enable_safety_checker?: boolean; /** * Classifier-free guidance scale. Higher values make the model follow the prompt more closely. Default value: `1.5` */ guidance_scale?: number; /** * Output image size. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "portrait_3_2" | "landscape_3_2" | "portrait_hd" | "landscape_hd"; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * Number of diffusion denoising steps. More steps generally produce higher quality images. Default value: `30` */ num_inference_steps?: number; /** * Output image format. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * Text prompt for image generation. */ prompt: string; /** * Random seed for reproducibility. The same seed with the same prompt will produce the same image. */ seed?: number; /** * If True, the image will be returned as a base64 data URI instead of a URL. */ sync_mode?: boolean; }; export type GlmImageToImageInput = { /** * If True, the prompt will be enhanced using an LLM for more detailed and higher quality results. */ enable_prompt_expansion?: boolean; /** * Enable NSFW safety checking on the generated images. Default value: `true` */ enable_safety_checker?: boolean; /** * Classifier-free guidance scale. Higher values make the model follow the prompt more closely. Default value: `1.5` */ guidance_scale?: number; /** * Output image size. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "portrait_3_2" | "landscape_3_2" | "portrait_hd" | "landscape_hd"; /** * URL(s) of the condition image(s) for image-to-image generation. Supports up to 4 URLs for multi-image references. */ image_urls: Array; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * Number of diffusion denoising steps. More steps generally produce higher quality images. Default value: `30` */ num_inference_steps?: number; /** * Output image format. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * Text prompt for image generation. */ prompt: string; /** * Random seed for reproducibility. The same seed with the same prompt will produce the same image. */ seed?: number; /** * If True, the image will be returned as a base64 data URI instead of a URL. */ sync_mode?: boolean; }; export type GlowInput = { /** * Glow intensity Default value: `1` */ glow_intensity?: number; /** * Glow blur radius Default value: `5` */ glow_radius?: number; /** * URL of image to process */ image_url: string | Blob | File; }; export type GoalForceInput = { /** * Enable safety checker for generated content. Default value: `true` */ enable_safety_checker?: boolean; /** * Indirect goal force applied to a target object. The model generates physics-plausible interactions to move the target in the specified direction. Specify either this, projectile_force, or both. */ goal_force?: ForceParams; /** * Classifier-free guidance scale. Default value: `5` */ guidance_scale?: number; /** * URL of the input image (first frame). The image will be resized to 832x480. Default value: `"https://raw.githubusercontent.com/brown-palm/goal-force/2d0209dc27333bffdb3f06df657ae05b11407615/datasets/examples/human-object-interaction/images/_bulb_tool.png"` */ image_url?: string | Blob | File; /** * Negative prompt for generation. Default value: `"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"` */ negative_prompt?: string; /** * Number of denoising steps. Higher values produce better quality but take longer. Default value: `50` */ num_inference_steps?: number; /** * Direct force applied to a projectile object, causing it to move along the force direction. Specify either this, goal_force, or both. */ projectile_force?: ForceParams; /** * Text description of the scene and the desired physics interaction. Default value: `"A hand pushes the hanging bulb, causing it to sway."` */ prompt?: string; /** * Random seed for reproducibility. */ seed?: number; }; export type GoalForceOutput = { /** * The seed used for generation. */ seed: number; /** * Timing information. */ timings: unknown; /** * The generated video. */ video: VideoFile; }; export type GotOcrV2Input = { /** * Generate the output in formatted mode. */ do_format?: boolean; /** * URL of images. */ input_image_urls?: Array; /** * Use provided images to generate a single output. */ multi_page?: boolean; }; export type GotOcrV2Output = { /** * Generated output */ outputs: Array; }; export type GptImage15EditInput = { /** * Background for the generated image Default value: `"auto"` */ background?: "auto" | "transparent" | "opaque"; /** * Aspect ratio for the generated image Default value: `"auto"` */ image_size?: "auto" | "1024x1024" | "1536x1024" | "1024x1536"; /** * The URLs of the images to use as a reference for the generation. */ image_urls: Array; /** * Input fidelity for the generated image Default value: `"high"` */ input_fidelity?: "low" | "high"; /** * The URL of the mask image to use for the generation. This indicates what part of the image to edit. */ mask_image_url?: string | Blob | File; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * Output format for the images Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt for image generation */ prompt: string; /** * Quality for the generated image Default value: `"high"` */ quality?: "low" | "medium" | "high"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type GptImage15Input = { /** * Background for the generated image Default value: `"auto"` */ background?: "auto" | "transparent" | "opaque"; /** * Aspect ratio for the generated image Default value: `"1024x1024"` */ image_size?: "1024x1024" | "1536x1024" | "1024x1536"; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * Output format for the images Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt for image generation */ prompt: string; /** * Quality for the generated image Default value: `"high"` */ quality?: "low" | "medium" | "high"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type GptImage1EditImageInput = { /** * Background for the generated image Default value: `"auto"` */ background?: "auto" | "transparent" | "opaque"; /** * Aspect ratio for the generated image Default value: `"auto"` */ image_size?: "auto" | "1024x1024" | "1536x1024" | "1024x1536"; /** * The URLs of the images to use as a reference for the generation. */ image_urls: Array; /** * Input fidelity for the generated image Default value: `"high"` */ input_fidelity?: "low" | "high"; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * Output format for the images Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt for image generation */ prompt: string; /** * Quality for the generated image Default value: `"auto"` */ quality?: "auto" | "low" | "medium" | "high"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type GptImage1MiniEditInput = { /** * Background for the generated image Default value: `"auto"` */ background?: "auto" | "transparent" | "opaque"; /** * Aspect ratio for the generated image Default value: `"auto"` */ image_size?: "auto" | "1024x1024" | "1536x1024" | "1024x1536"; /** * The URLs of the images to use as a reference for the generation. */ image_urls: Array; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * Output format for the images Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt for image generation */ prompt: string; /** * Quality for the generated image Default value: `"auto"` */ quality?: "auto" | "low" | "medium" | "high"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type GrainInput = { /** * Film grain intensity Default value: `0.4` */ grain_intensity?: number; /** * Film grain scale Default value: `10` */ grain_scale?: number; /** * Style of film grain to apply Default value: `"modern"` */ grain_style?: "modern" | "analog" | "kodak" | "fuji" | "cinematic" | "newspaper"; /** * URL of image to process */ image_url: string | Blob | File; }; export type GreenScreenRembgInput = { /** * Single VP9 video with alpha channel or two videos (rgb and alpha) in H264 format. H264 is recommended for better RGB quality. Default value: `"vp9"` */ output_codec?: "vp9" | "h264"; /** * Increase the value if green spots remain in the video, decrease if color changes are noticed on the extracted subject. Default value: `0.8` */ spill_suppression_strength?: number; /** * */ video_url: string | Blob | File; }; export type GuidanceInput = { /** * The image that should be used as guidance, in base64 format, with the method defined in guidance_method_1. Accepted formats are jpeg, jpg, png, webp. Maximum file size 12MB. If more then one guidance method is used, all guidance images must be of the same aspect ratio, and this will be the aspect ratio of the generated results. If guidance_method_1 is selected, an image must be provided. */ image_url: string | Blob | File; /** * Which guidance type you would like to include in the generation. Up to 4 guidance methods can be combined during a single inference. This parameter is optional. */ method?: "controlnet_canny" | "controlnet_depth" | "controlnet_recoloring" | "controlnet_color_grid"; /** * Impact of the guidance. Default value: `1` */ scale?: number; }; export type HdrStyleInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable the safety checker for the generated image. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale. Controls how closely the model follows the prompt. Default value: `2.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The strength of the HDR style effect. Default value: `1` */ lora_scale?: number; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `40` */ num_inference_steps?: number; /** * The format of the output image Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * The prompt to generate an HDR style image. The trigger word 'Hyp3rRe4list1c' will be automatically prepended. */ prompt: string; /** * Random seed for reproducibility. Same seed with same prompt will produce same result. */ seed?: number; /** * If `True`, the media will be returned as a data URI and won't be saved in history. */ sync_mode?: boolean; }; export type HeadshotInput = { /** * Aspect ratio for 4K output (default: 3:4 for portraits) */ aspect_ratio?: AspectRatio; /** * Default value: `"professional"` */ background_style?: "professional" | "corporate" | "clean" | "gradient"; /** * Portrait image URL to convert to professional headshot */ image_url: string | Blob | File; }; export type HEDInput = { /** * URL of the image to process */ image_url: string | Blob | File; /** * Whether to use the safe version of the HED detector */ safe?: boolean; /** * Whether to use the scribble version of the HED detector */ scribble?: boolean; }; export type HEDOutput = { /** * Image with lines detected using the HED detector */ image: Image; }; export type HeygenAvatar3DigitalTwinInput = { /** * Aspect ratio of the output video. Use '9:16' for portrait (vertical) videos, '16:9' for landscape, or '1:1' for square. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * URL of an audio file for the avatar to lip-sync to. When provided, the avatar uses this audio instead of text-to-speech. */ audio_url?: string | Blob | File; /** * Character configuration for the video */ character: Character; /** * Video resolution preset. Options: 360p, 480p, 540p, 720p, 1080p Default value: `"720p"` */ resolution?: "360p" | "480p" | "540p" | "720p" | "1080p"; /** * Voice configuration for the character */ voice: TextVoice; }; export type HeygenAvatar4ImageToVideoInput = { /** * Aspect ratio of the output video. Use '9:16' for portrait (vertical) videos, '16:9' for landscape, or '1:1' for square. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * URL of an audio file for the avatar to lip-sync to. When provided, overrides prompt and voice. */ audio_url?: string | Blob | File; /** * Background configuration */ background?: AvatarIVBackground; /** * Whether to add captions to the video */ caption?: boolean; /** * Facial expression */ expression?: string; /** * URL of the image to animate. The image should contain a clear face. */ image_url: string | Blob | File; /** * The text the avatar will speak */ prompt?: string; /** * Video resolution preset. Options: 360p, 480p, 540p, 720p, 1080p Default value: `"720p"` */ resolution?: "360p" | "480p" | "540p" | "720p" | "1080p"; /** * Talking style - 'stable' for minimal movement, 'expressive' for more animation Default value: `"stable"` */ talking_style?: "stable" | "expressive"; /** * Name of the voice to use for the avatar */ voice?: string; }; export type HeygenV2TranslateSpeedInput = { /** * Enable dynamic duration to enhance conversational fluidity between languages with different speaking rates Default value: `true` */ enable_dynamic_duration?: boolean; /** * The target language to translate the video into */ output_language: "English" | "Spanish" | "French" | "Hindi" | "Italian" | "German" | "Polish" | "Portuguese" | "Chinese" | "Japanese" | "Dutch" | "Turkish" | "Korean" | "Danish" | "Arabic" | "Romanian" | "Mandarin" | "Filipino" | "Swedish" | "Indonesian" | "Ukrainian" | "Greek" | "Czech" | "Bulgarian" | "Malay" | "Slovak" | "Croatian" | "Tamil" | "Finnish" | "Russian" | "Afrikaans (South Africa)" | "Albanian (Albania)" | "Amharic (Ethiopia)" | "Arabic (Algeria)" | "Arabic (Bahrain)" | "Arabic (Egypt)" | "Arabic (Iraq)" | "Arabic (Jordan)" | "Arabic (Kuwait)" | "Arabic (Lebanon)" | "Arabic (Libya)" | "Arabic (Morocco)" | "Arabic (Oman)" | "Arabic (Qatar)" | "Arabic (Saudi Arabia)" | "Arabic (Syria)" | "Arabic (Tunisia)" | "Arabic (United Arab Emirates)" | "Arabic (Yemen)" | "Armenian (Armenia)" | "Azerbaijani (Latin, Azerbaijan)" | "Bangla (Bangladesh)" | "Basque" | "Bengali (India)" | "Bosnian (Bosnia and Herzegovina)" | "Bulgarian (Bulgaria)" | "Burmese (Myanmar)" | "Catalan" | "Chinese (Cantonese, Traditional)" | "Chinese (Jilu Mandarin, Simplified)" | "Chinese (Mandarin, Simplified)" | "Chinese (Northeastern Mandarin, Simplified)" | "Chinese (Southwestern Mandarin, Simplified)" | "Chinese (Taiwanese Mandarin, Traditional)" | "Chinese (Wu, Simplified)" | "Chinese (Zhongyuan Mandarin Henan, Simplified)" | "Chinese (Zhongyuan Mandarin Shaanxi, Simplified)" | "Croatian (Croatia)" | "Czech (Czechia)" | "Danish (Denmark)" | "Dutch (Belgium)" | "Dutch (Netherlands)" | "English (Australia)" | "English (Canada)" | "English (Hong Kong SAR)" | "English (India)" | "English (Ireland)" | "English (Kenya)" | "English (New Zealand)" | "English (Nigeria)" | "English (Philippines)" | "English (Singapore)" | "English (South Africa)" | "English (Tanzania)" | "English (UK)" | "English (United States)" | "Estonian (Estonia)" | "Filipino (Philippines)" | "Finnish (Finland)" | "French (Belgium)" | "French (Canada)" | "French (France)" | "French (Switzerland)" | "Galician" | "Georgian (Georgia)" | "German (Austria)" | "German (Germany)" | "German (Switzerland)" | "Greek (Greece)" | "Gujarati (India)" | "Hebrew (Israel)" | "Hindi (India)" | "Hungarian (Hungary)" | "Icelandic (Iceland)" | "Indonesian (Indonesia)" | "Irish (Ireland)" | "Italian (Italy)" | "Japanese (Japan)" | "Javanese (Latin, Indonesia)" | "Kannada (India)" | "Kazakh (Kazakhstan)" | "Khmer (Cambodia)" | "Korean (Korea)" | "Lao (Laos)" | "Latvian (Latvia)" | "Lithuanian (Lithuania)" | "Macedonian (North Macedonia)" | "Malay (Malaysia)" | "Malayalam (India)" | "Maltese (Malta)" | "Marathi (India)" | "Mongolian (Mongolia)" | "Nepali (Nepal)" | "Norwegian Bokmål (Norway)" | "Pashto (Afghanistan)" | "Persian (Iran)" | "Polish (Poland)" | "Portuguese (Brazil)" | "Portuguese (Portugal)" | "Romanian (Romania)" | "Russian (Russia)" | "Serbian (Latin, Serbia)" | "Sinhala (Sri Lanka)" | "Slovak (Slovakia)" | "Slovenian (Slovenia)" | "Somali (Somalia)" | "Spanish (Argentina)" | "Spanish (Bolivia)" | "Spanish (Chile)" | "Spanish (Colombia)" | "Spanish (Costa Rica)" | "Spanish (Cuba)" | "Spanish (Dominican Republic)" | "Spanish (Ecuador)" | "Spanish (El Salvador)" | "Spanish (Equatorial Guinea)" | "Spanish (Guatemala)" | "Spanish (Honduras)" | "Spanish (Mexico)" | "Spanish (Nicaragua)" | "Spanish (Panama)" | "Spanish (Paraguay)" | "Spanish (Peru)" | "Spanish (Puerto Rico)" | "Spanish (Spain)" | "Spanish (United States)" | "Spanish (Uruguay)" | "Spanish (Venezuela)" | "Sundanese (Indonesia)" | "Swahili (Kenya)" | "Swahili (Tanzania)" | "Swedish (Sweden)" | "Tamil (India)" | "Tamil (Malaysia)" | "Tamil (Singapore)" | "Tamil (Sri Lanka)" | "Telugu (India)" | "Thai (Thailand)" | "Turkish (Türkiye)" | "Ukrainian (Ukraine)" | "Urdu (India)" | "Urdu (Pakistan)" | "Uzbek (Latin, Uzbekistan)" | "Vietnamese (Vietnam)" | "Welsh (United Kingdom)" | "Zulu (South Africa)" | "English - Your Accent" | "English - American Accent"; /** * Number of speakers in the video */ speaker_num?: number; /** * Translate only the audio, ignore the faces and only translate the voice track */ translate_audio_only?: boolean; /** * URL of the video to translate. */ video_url: string | Blob | File; }; export type HeygenV2VideoAgentInput = { /** * Video configuration options */ config?: VideoAgentConfig; /** * Natural language prompt describing the video to generate. Include details about style, visual elements, and desired length for best results. */ prompt: string; }; export type HidreamE11Input = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your initial image when looking for a related image to show you. Default value: `2` */ image_guidance_scale?: number; /** * URL of an input image to edit. */ image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `"low resolution, blur"` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt?: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The description of the target image after your edits have been made. Leave this blank to allow the model to use its own imagination. */ target_image_description?: string; }; export type HidreamI1DevInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type HidreamI1FullImageToImageInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * The size of the generated image. Setting to None uses the input image's size. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The image URL to generate an image from. */ image_url: string | Blob | File; /** * A list of LoRAs to apply to the model. Each LoRA specifies its path, scale, and optional weight name. */ loras?: Array; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Denoising strength for image-to-image generation. Default value: `0.75` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type HidreamI1FullInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * A list of LoRAs to apply to the model. Each LoRA specifies its path, scale, and optional weight name. */ loras?: Array; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Hunyuan3DInput = { /** * Guidance scale for the model. Default value: `7.5` */ guidance_scale?: number; /** * URL of image to use while generating the 3D model. */ input_image_url: string | Blob | File; /** * Number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * Octree resolution for the model. Default value: `256` */ octree_resolution?: number; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If set true, textured mesh will be generated and the price charged would be 3 times that of white mesh. */ textured_mesh?: boolean; }; export type Hunyuan3dV21Output = { /** * Generated 3D object. */ model_glb: File; /** * Generated 3D object with PBR materials. */ model_glb_pbr?: File; /** * Generated 3D object assets zip. */ model_mesh: File; /** * Seed value used for generation. */ seed: number; }; export type Hunyuan3dV2MultiViewInput = { /** * URL of image to use while generating the 3D model. */ back_image_url: string | Blob | File; /** * URL of image to use while generating the 3D model. */ front_image_url: string | Blob | File; /** * Guidance scale for the model. Default value: `7.5` */ guidance_scale?: number; /** * URL of image to use while generating the 3D model. */ left_image_url: string | Blob | File; /** * Number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * Octree resolution for the model. Default value: `256` */ octree_resolution?: number; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If set true, textured mesh will be generated and the price charged would be 3 times that of white mesh. */ textured_mesh?: boolean; }; export type Hunyuan3dV2Output = { /** * Generated 3D object file. */ model_mesh: File; /** * Seed value used for generation. */ seed: number; }; export type Hunyuan3dV3ImageTo3dInput = { /** * Optional back view image URL for better 3D reconstruction. */ back_image_url?: string | Blob | File; /** * Whether to enable PBR material generation. Does not take effect when generate_type is Geometry. */ enable_pbr?: boolean; /** * Target face count. Range: 40000-1500000 Default value: `500000` */ face_count?: number; /** * Generation type. Normal: textured model. LowPoly: polygon reduction. Geometry: white model without texture. Default value: `"Normal"` */ generate_type?: "Normal" | "LowPoly" | "Geometry"; /** * URL of image to use while generating the 3D model. */ input_image_url: string | Blob | File; /** * Optional left view image URL for better 3D reconstruction. */ left_image_url?: string | Blob | File; /** * Polygon type. Only takes effect when GenerateType is LowPoly. Default value: `"triangle"` */ polygon_type?: "triangle" | "quadrilateral"; /** * Optional right view image URL for better 3D reconstruction. */ right_image_url?: string | Blob | File; }; export type Hunyuan3dV3TextTo3dInput = { /** * Whether to enable PBR material generation */ enable_pbr?: boolean; /** * Target face count. Range: 40000-1500000 Default value: `500000` */ face_count?: number; /** * Generation type. Normal: textured model. LowPoly: polygon reduction. Geometry: white model without texture. Default value: `"Normal"` */ generate_type?: "Normal" | "LowPoly" | "Geometry"; /** * Polygon type. Only takes effect when GenerateType is LowPoly. Default value: `"triangle"` */ polygon_type?: "triangle" | "quadrilateral"; /** * Text description of the 3D content to generate. Supports up to 1024 UTF-8 characters. */ prompt: string; }; export type HunyuanAvatarInput = { /** * The URL of the audio file. */ audio_url: string | Blob | File; /** * The URL of the reference image. */ image_url: string | Blob | File; /** * Number of video frames to generate at 25 FPS. If greater than the input audio length, it will capped to the length of the input audio. Default value: `129` */ num_frames?: number; /** * Number of inference steps for sampling. Higher values give better quality but take longer. Default value: `30` */ num_inference_steps?: number; /** * Random seed for generation. */ seed?: number; /** * Text prompt describing the scene. Default value: `"A cat is singing."` */ text?: string; /** * If true, the video will be generated faster with no noticeable degradation in the visual quality. Default value: `true` */ turbo_mode?: boolean; }; export type HunyuanCustomInput = { /** * The aspect ratio of the video to generate. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * Classifier-Free Guidance scale for the generation. Default value: `7.5` */ cfg_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * URL of the image input. */ image_url: string | Blob | File; /** * Negative prompt for video generation. Default value: `"Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion, blurring, text, subtitles, static, picture, black border."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `129` */ num_frames?: number; /** * The number of inference steps to run. Lower gets faster results, higher gets better results. Default value: `30` */ num_inference_steps?: number; /** * Text prompt for video generation (max 500 characters). */ prompt: string; /** * The resolution of the video to generate. 720p generations cost 1.5x more than 480p generations. Default value: `"512p"` */ resolution?: "512p" | "720p"; /** * The seed to use for generating the video. */ seed?: number; }; export type HunyuanImageToImageInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Controls how much the model adheres to the prompt. Higher values mean stricter adherence. Default value: `3.5` */ guidance_scale?: number; /** * The desired size of the generated image. By default, attempts to preserve the original size. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of the Image for Image-to-Image */ image_url: string | Blob | File; /** * The negative prompt to guide the image generation away from certain concepts. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * Number of denoising steps. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The text prompt for image-to-image. */ prompt: string; /** * Random seed for reproducible results. If None, a random seed is used. */ seed?: number; /** * The strength to use for image-to-image. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.6` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Enable the refiner model for improved image quality. */ use_refiner?: boolean; /** * Enable prompt enhancement for potentially better results. Default value: `true` */ use_reprompt?: boolean; }; export type HunyuanImageV3InstructEditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Controls how much the model adheres to the prompt. Higher values mean stricter adherence. Default value: `3.5` */ guidance_scale?: number; /** * The desired size of the generated image. If auto, image size will be determined by the model. Default value: `auto` */ image_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images to use as a reference for the generation. A maximum of 3 images are supported. */ image_urls: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The text prompt to generate an image from. */ prompt: string; /** * Random seed for reproducible results. If None, a random seed is used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type HunyuanImageV3InstructTextToImageInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Controls how much the model adheres to the prompt. Higher values mean stricter adherence. Default value: `3.5` */ guidance_scale?: number; /** * The desired size of the generated image. If auto, image size will be determined by the model. Default value: `auto` */ image_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The text prompt to generate an image from. */ prompt: string; /** * Random seed for reproducible results. If None, a random seed is used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type HunyuanImageV3TextToImageInput = { /** * Whether to enable prompt expansion. This will use a large language model to expand the prompt with additional details while maintaining the original meaning. */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Controls how much the model adheres to the prompt. Higher values mean stricter adherence. Default value: `7.5` */ guidance_scale?: number; /** * The desired size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to guide the image generation away from certain concepts. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * Number of denoising steps. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The text prompt for image-to-image. */ prompt: string; /** * Random seed for reproducible results. If None, a random seed is used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type HunyuanMotionInput = { /** * Motion duration in seconds (0.5-12.0). Default value: `5` */ duration?: number; /** * Classifier-free guidance scale. Higher = more faithful to prompt. Default value: `5` */ guidance_scale?: number; /** * Output format: 'fbx' for animation files, 'dict' for raw JSON. Default value: `"fbx"` */ output_format?: "fbx" | "dict"; /** * Text prompt describing the motion to generate. */ prompt: string; /** * Random seed for reproducible generation. */ seed?: number; }; export type HunyuanMotionOutput = { /** * Generated FBX animation file. */ fbx_file?: File; /** * Generated motion data as JSON. */ motion_json?: File; /** * Seed used for generation. */ seed: number; }; export type HunyuanPortraitInput = { /** * The URL of the source image. */ image_url: string | Blob | File; /** * Random seed for generation. If None, a random seed will be used. */ seed?: number; /** * Whether to use ArcFace for face recognition. Default value: `true` */ use_arcface?: boolean; /** * The URL of the driving video. */ video_url: string | Blob | File; }; export type HunyuanTextToImageInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Controls how much the model adheres to the prompt. Higher values mean stricter adherence. Default value: `3.5` */ guidance_scale?: number; /** * The desired size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to guide the image generation away from certain concepts. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * Number of denoising steps. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The text prompt to generate an image from. */ prompt: string; /** * Random seed for reproducible results. If None, a random seed is used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Enable the refiner model for improved image quality. */ use_refiner?: boolean; /** * Enable prompt enhancement for potentially better results. Default value: `true` */ use_reprompt?: boolean; }; export type HunyuanVideoFoleyInput = { /** * Guidance scale for audio generation. Default value: `4.5` */ guidance_scale?: number; /** * Negative prompt to avoid certain audio characteristics. Default value: `"noisy, harsh"` */ negative_prompt?: string; /** * Number of inference steps for generation. Default value: `50` */ num_inference_steps?: number; /** * Random seed for reproducible generation. */ seed?: number; /** * Text description of the desired audio (optional). */ text_prompt: string; /** * The URL of the video to generate audio for. */ video_url: string | Blob | File; }; export type HunyuanVideoImageToVideoInput = { /** * The aspect ratio of the video to generate. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * Turning on I2V Stability reduces hallucination but also reduces motion. */ i2v_stability?: boolean; /** * URL of the image input. */ image_url: string | Blob | File; /** * The number of frames to generate. Default value: `"129"` */ num_frames?: string; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video to generate. Default value: `"720p"` */ resolution?: string; /** * The seed to use for generating the video. */ seed?: number; }; export type HunyuanVideoLoraInput = { /** * The aspect ratio of the video to generate. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of frames to generate. Default value: `"129"` */ num_frames?: "129" | "85"; /** * By default, generations are done with 35 steps. Pro mode does 55 steps which results in higher quality videos but will take more time and cost 2x more billing units. */ pro_mode?: boolean; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video to generate. Default value: `"720p"` */ resolution?: "480p" | "580p" | "720p"; /** * The seed to use for generating the video. */ seed?: number; }; export type HunyuanVideoLoraTrainingInput = { /** * The format of the archive. If not specified, the format will be inferred from the URL. */ data_archive_format?: string; /** * Whether to generate captions for the images. Default value: `true` */ do_caption?: boolean; /** * URL to zip archive with images. Try to use at least 4 images in general the more the better. * * In addition to images the archive can contain text files with captions. Each text file should have the same name as the image file it corresponds to. */ images_data_url: string | Blob | File; /** * Learning rate to use for training. Default value: `0.0001` */ learning_rate?: number; /** * Number of steps to train the LoRA on. */ steps: number; /** * The trigger word to use. Default value: `""` */ trigger_word?: string; }; export type HunyuanVideoLoraVideoToVideoInput = { /** * The aspect ratio of the video to generate. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of frames to generate. Default value: `"129"` */ num_frames?: "129" | "85"; /** * By default, generations are done with 35 steps. Pro mode does 55 steps which results in higher quality videos but will take more time and cost 2x more billing units. */ pro_mode?: boolean; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video to generate. Default value: `"720p"` */ resolution?: "480p" | "580p" | "720p"; /** * The seed to use for generating the video. */ seed?: number; /** * Strength of video-to-video Default value: `0.75` */ strength?: number; /** * URL of the video */ video_url: string | Blob | File; }; export type HunyuanVideoV15ImageToVideoInput = { /** * The aspect ratio of the video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * Enable prompt expansion to enhance the input prompt. Default value: `true` */ enable_prompt_expansion?: boolean; /** * URL of the reference image for image-to-video generation. */ image_url: string | Blob | File; /** * The negative prompt to guide what not to generate. Default value: `""` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps. Default value: `28` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video. Default value: `"480p"` */ resolution?: string; /** * Random seed for reproducibility. */ seed?: number; }; export type HunyuanVideoV15TextToVideoInput = { /** * The aspect ratio of the video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * Enable prompt expansion to enhance the input prompt. Default value: `true` */ enable_prompt_expansion?: boolean; /** * The negative prompt to guide what not to generate. Default value: `""` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps. Default value: `28` */ num_inference_steps?: number; /** * The prompt to generate the video. */ prompt: string; /** * The resolution of the video. Default value: `"480p"` */ resolution?: string; /** * Random seed for reproducibility. */ seed?: number; }; export type HunyuanVideoVideoToVideoInput = { /** * The aspect ratio of the video to generate. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * The number of frames to generate. Default value: `"129"` */ num_frames?: "129" | "85"; /** * By default, generations are done with 35 steps. Pro mode does 55 steps which results in higher quality videos but will take more time and cost 2x more billing units. */ pro_mode?: boolean; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video to generate. Default value: `"720p"` */ resolution?: "480p" | "580p" | "720p"; /** * The seed to use for generating the video. */ seed?: number; /** * Strength for Video-to-Video Default value: `0.85` */ strength?: number; /** * URL of the video input. */ video_url: string | Blob | File; }; export type HunyuanWorldImageToWorldInput = { /** * Classes to use for the world generation. */ classes: string; /** * Whether to export DRC (Dynamic Resource Configuration). */ export_drc?: boolean; /** * The URL of the image to convert to a world. */ image_url: string | Blob | File; /** * Labels for the first foreground object. */ labels_fg1: string; /** * Labels for the second foreground object. */ labels_fg2: string; }; export type HunyuanWorldImageToWorldOutput = { /** * The generated world. */ world_file: File; }; export type HunyuanWorldInput = { /** * The URL of the image to convert to a panorama. */ image_url: string | Blob | File; /** * The prompt to use for the panorama generation. */ prompt: string; }; export type HyWuEditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Enable thinking mode. The model reasons about the edit before generating, producing higher quality results at the cost of longer inference time. Disable for faster results on straightforward edits. Default value: `true` */ enable_thinking?: boolean; /** * The desired size of the generated image. If auto, image size will be determined by the model. Default value: `auto` */ image_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URLs of input images for editing. Typically 2 images: the base image and the reference image. Supports up to 3 images. */ image_urls: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * Number of diffusion denoising steps. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The text prompt describing the desired edit. Supports both English and Chinese. Use specific instructions like 'Replace the clothing on figure 1 with the outfit from figure 2'. */ prompt: string; /** * Random seed for reproducible results. If None, a random seed is used. */ seed?: number; /** * If True, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type HyWuEditOutput = { /** * A list of the generated/edited images. */ images: Array; /** * The seed used for generation. */ seed: number; /** * Performance timing breakdown. */ timings?: unknown; }; export type I2VOutput = { /** * The generated video */ video: File; }; export type IclightV2Input = { /** * Threshold for the background removal algorithm. A high threshold will produce sharper masks. Note: This parameter is currently deprecated and has no effect on the output. Default value: `0.67` */ background_threshold?: number; /** * The real classifier-free-guidance scale for the generation. Default value: `1` */ cfg?: number; /** * Use HR fix */ enable_hr_fix?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * Strength for high-resolution pass. Only used if enable_hr_fix is True. Default value: `0.95` */ highres_denoise?: number; /** * Default value: `0.5` */ hr_downscale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to be used for relighting */ image_url: string | Blob | File; /** * Provide lighting conditions for the model Default value: `"None"` */ initial_latent?: "None" | "Left" | "Right" | "Top" | "Bottom"; /** * Strength for low-resolution pass. Default value: `0.98` */ lowres_denoise?: number; /** * URL of mask to be used for ic-light conditioning image */ mask_image_url?: string | Blob | File; /** * Negative Prompt for the image Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type IdentifyFaceOutput = { /** * List of detected faces in the video with their time ranges. */ face_data: Array; /** * The session id of the lip-sync task */ session_id: string; }; export type IdeogramCharacterEditInput = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * Determine if MagicPrompt should be used in generating the request or not. Default value: `true` */ expand_prompt?: boolean; /** * The image URL to generate an image from. MUST have the exact same dimensions (width and height) as the mask image. */ image_url: string | Blob | File; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * The mask URL to inpaint the image. MUST have the exact same dimensions (width and height) as the input image. */ mask_url: string | Blob | File; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The prompt to fill the masked part of the image. */ prompt: string; /** * A set of images to use as character references. Currently only 1 image is supported, rest will be ignored. (maximum total size 10MB across all character references). The images should be in JPEG, PNG or WebP format */ reference_image_urls: Array; /** * A set of masks to apply to the character references. Currently only 1 mask is supported, rest will be ignored. (maximum total size 10MB across all character references). The masks should be in JPEG, PNG or WebP format */ reference_mask_urls?: Array; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramCharacterInput = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * Determine if MagicPrompt should be used in generating the request or not. Default value: `true` */ expand_prompt?: boolean; /** * The resolution of the generated image Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * Description of what to exclude from an image. Descriptions in the prompt take precedence to descriptions in the negative prompt. Default value: `""` */ negative_prompt?: string; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The prompt to fill the masked part of the image. */ prompt: string; /** * A set of images to use as character references. Currently only 1 image is supported, rest will be ignored. (maximum total size 10MB across all character references). The images should be in JPEG, PNG or WebP format */ reference_image_urls: Array; /** * A set of masks to apply to the character references. Currently only 1 mask is supported, rest will be ignored. (maximum total size 10MB across all character references). The masks should be in JPEG, PNG or WebP format */ reference_mask_urls?: Array; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * The style type to generate with. Cannot be used with style_codes. Default value: `"AUTO"` */ style?: "AUTO" | "REALISTIC" | "FICTION"; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramCharacterRemixInput = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * Determine if MagicPrompt should be used in generating the request or not. Default value: `true` */ expand_prompt?: boolean; /** * The resolution of the generated image Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The image URL to remix */ image_url: string | Blob | File; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * Description of what to exclude from an image. Descriptions in the prompt take precedence to descriptions in the negative prompt. Default value: `""` */ negative_prompt?: string; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The prompt to remix the image with */ prompt: string; /** * A set of images to use as character references. Currently only 1 image is supported, rest will be ignored. (maximum total size 10MB across all character references). The images should be in JPEG, PNG or WebP format */ reference_image_urls: Array; /** * A set of masks to apply to the character references. Currently only 1 mask is supported, rest will be ignored. (maximum total size 10MB across all character references). The masks should be in JPEG, PNG or WebP format */ reference_mask_urls?: Array; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * Strength of the input image in the remix Default value: `0.8` */ strength?: number; /** * The style type to generate with. Cannot be used with style_codes. Default value: `"AUTO"` */ style?: "AUTO" | "REALISTIC" | "FICTION"; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV2aInput = { /** * The aspect ratio of the generated image Default value: `"1:1"` */ aspect_ratio?: "10:16" | "16:10" | "9:16" | "16:9" | "4:3" | "3:4" | "1:1" | "1:3" | "3:1" | "3:2" | "2:3"; /** * Whether to expand the prompt with MagicPrompt functionality. Default value: `true` */ expand_prompt?: boolean; /** * */ prompt: string; /** * Seed for the random number generator */ seed?: number; /** * The style of the generated image Default value: `"auto"` */ style?: "auto" | "general" | "realistic" | "design" | "render_3D" | "anime"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV2EditInput = { /** * Whether to expand the prompt with MagicPrompt functionality. Default value: `true` */ expand_prompt?: boolean; /** * The image URL to generate an image from. Needs to match the dimensions of the mask. */ image_url: string | Blob | File; /** * The mask URL to inpaint the image. Needs to match the dimensions of the input image. */ mask_url: string | Blob | File; /** * The prompt to fill the masked part of the image. */ prompt: string; /** * Seed for the random number generator */ seed?: number; /** * The style of the generated image Default value: `"auto"` */ style?: "auto" | "general" | "realistic" | "design" | "render_3D" | "anime"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV2Input = { /** * The aspect ratio of the generated image Default value: `"1:1"` */ aspect_ratio?: "10:16" | "16:10" | "9:16" | "16:9" | "4:3" | "3:4" | "1:1" | "1:3" | "3:1" | "3:2" | "2:3"; /** * Whether to expand the prompt with MagicPrompt functionality. Default value: `true` */ expand_prompt?: boolean; /** * A negative prompt to avoid in the generated image Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * Seed for the random number generator */ seed?: number; /** * The style of the generated image Default value: `"auto"` */ style?: "auto" | "general" | "realistic" | "design" | "render_3D" | "anime"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV3EditInput = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * Determine if MagicPrompt should be used in generating the request or not. Default value: `true` */ expand_prompt?: boolean; /** * The image URL to generate an image from. MUST have the exact same dimensions (width and height) as the mask image. */ image_url: string | Blob | File; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * The mask URL to inpaint the image. MUST have the exact same dimensions (width and height) as the input image. */ mask_url: string | Blob | File; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The prompt to fill the masked part of the image. */ prompt: string; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * Style preset for generation. The chosen style preset will guide the generation. */ style_preset?: "80S_ILLUSTRATION" | "90S_NOSTALGIA" | "ABSTRACT_ORGANIC" | "ANALOG_NOSTALGIA" | "ART_BRUT" | "ART_DECO" | "ART_POSTER" | "AURA" | "AVANT_GARDE" | "BAUHAUS" | "BLUEPRINT" | "BLURRY_MOTION" | "BRIGHT_ART" | "C4D_CARTOON" | "CHILDRENS_BOOK" | "COLLAGE" | "COLORING_BOOK_I" | "COLORING_BOOK_II" | "CUBISM" | "DARK_AURA" | "DOODLE" | "DOUBLE_EXPOSURE" | "DRAMATIC_CINEMA" | "EDITORIAL" | "EMOTIONAL_MINIMAL" | "ETHEREAL_PARTY" | "EXPIRED_FILM" | "FLAT_ART" | "FLAT_VECTOR" | "FOREST_REVERIE" | "GEO_MINIMALIST" | "GLASS_PRISM" | "GOLDEN_HOUR" | "GRAFFITI_I" | "GRAFFITI_II" | "HALFTONE_PRINT" | "HIGH_CONTRAST" | "HIPPIE_ERA" | "ICONIC" | "JAPANDI_FUSION" | "JAZZY" | "LONG_EXPOSURE" | "MAGAZINE_EDITORIAL" | "MINIMAL_ILLUSTRATION" | "MIXED_MEDIA" | "MONOCHROME" | "NIGHTLIFE" | "OIL_PAINTING" | "OLD_CARTOONS" | "PAINT_GESTURE" | "POP_ART" | "RETRO_ETCHING" | "RIVIERA_POP" | "SPOTLIGHT_80S" | "STYLIZED_RED" | "SURREAL_COLLAGE" | "TRAVEL_POSTER" | "VINTAGE_GEO" | "VINTAGE_POSTER" | "WATERCOLOR" | "WEIRD" | "WOODBLOCK_PRINT"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV3Input = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * Determine if MagicPrompt should be used in generating the request or not. Default value: `true` */ expand_prompt?: boolean; /** * The resolution of the generated image Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * Description of what to exclude from an image. Descriptions in the prompt take precedence to descriptions in the negative prompt. Default value: `""` */ negative_prompt?: string; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * */ prompt: string; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * The style type to generate with. Cannot be used with style_codes. */ style?: "AUTO" | "GENERAL" | "REALISTIC" | "DESIGN"; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * Style preset for generation. The chosen style preset will guide the generation. */ style_preset?: "80S_ILLUSTRATION" | "90S_NOSTALGIA" | "ABSTRACT_ORGANIC" | "ANALOG_NOSTALGIA" | "ART_BRUT" | "ART_DECO" | "ART_POSTER" | "AURA" | "AVANT_GARDE" | "BAUHAUS" | "BLUEPRINT" | "BLURRY_MOTION" | "BRIGHT_ART" | "C4D_CARTOON" | "CHILDRENS_BOOK" | "COLLAGE" | "COLORING_BOOK_I" | "COLORING_BOOK_II" | "CUBISM" | "DARK_AURA" | "DOODLE" | "DOUBLE_EXPOSURE" | "DRAMATIC_CINEMA" | "EDITORIAL" | "EMOTIONAL_MINIMAL" | "ETHEREAL_PARTY" | "EXPIRED_FILM" | "FLAT_ART" | "FLAT_VECTOR" | "FOREST_REVERIE" | "GEO_MINIMALIST" | "GLASS_PRISM" | "GOLDEN_HOUR" | "GRAFFITI_I" | "GRAFFITI_II" | "HALFTONE_PRINT" | "HIGH_CONTRAST" | "HIPPIE_ERA" | "ICONIC" | "JAPANDI_FUSION" | "JAZZY" | "LONG_EXPOSURE" | "MAGAZINE_EDITORIAL" | "MINIMAL_ILLUSTRATION" | "MIXED_MEDIA" | "MONOCHROME" | "NIGHTLIFE" | "OIL_PAINTING" | "OLD_CARTOONS" | "PAINT_GESTURE" | "POP_ART" | "RETRO_ETCHING" | "RIVIERA_POP" | "SPOTLIGHT_80S" | "STYLIZED_RED" | "SURREAL_COLLAGE" | "TRAVEL_POSTER" | "VINTAGE_GEO" | "VINTAGE_POSTER" | "WATERCOLOR" | "WEIRD" | "WOODBLOCK_PRINT"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV3ReframeInput = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * The resolution for the reframed output image */ image_size: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The image URL to reframe */ image_url: string | Blob | File; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * The style type to generate with. Cannot be used with style_codes. */ style?: "AUTO" | "GENERAL" | "REALISTIC" | "DESIGN"; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * Style preset for generation. The chosen style preset will guide the generation. */ style_preset?: "80S_ILLUSTRATION" | "90S_NOSTALGIA" | "ABSTRACT_ORGANIC" | "ANALOG_NOSTALGIA" | "ART_BRUT" | "ART_DECO" | "ART_POSTER" | "AURA" | "AVANT_GARDE" | "BAUHAUS" | "BLUEPRINT" | "BLURRY_MOTION" | "BRIGHT_ART" | "C4D_CARTOON" | "CHILDRENS_BOOK" | "COLLAGE" | "COLORING_BOOK_I" | "COLORING_BOOK_II" | "CUBISM" | "DARK_AURA" | "DOODLE" | "DOUBLE_EXPOSURE" | "DRAMATIC_CINEMA" | "EDITORIAL" | "EMOTIONAL_MINIMAL" | "ETHEREAL_PARTY" | "EXPIRED_FILM" | "FLAT_ART" | "FLAT_VECTOR" | "FOREST_REVERIE" | "GEO_MINIMALIST" | "GLASS_PRISM" | "GOLDEN_HOUR" | "GRAFFITI_I" | "GRAFFITI_II" | "HALFTONE_PRINT" | "HIGH_CONTRAST" | "HIPPIE_ERA" | "ICONIC" | "JAPANDI_FUSION" | "JAZZY" | "LONG_EXPOSURE" | "MAGAZINE_EDITORIAL" | "MINIMAL_ILLUSTRATION" | "MIXED_MEDIA" | "MONOCHROME" | "NIGHTLIFE" | "OIL_PAINTING" | "OLD_CARTOONS" | "PAINT_GESTURE" | "POP_ART" | "RETRO_ETCHING" | "RIVIERA_POP" | "SPOTLIGHT_80S" | "STYLIZED_RED" | "SURREAL_COLLAGE" | "TRAVEL_POSTER" | "VINTAGE_GEO" | "VINTAGE_POSTER" | "WATERCOLOR" | "WEIRD" | "WOODBLOCK_PRINT"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV3RemixInput = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * Determine if MagicPrompt should be used in generating the request or not. Default value: `true` */ expand_prompt?: boolean; /** * The resolution of the generated image Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The image URL to remix */ image_url: string | Blob | File; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * Description of what to exclude from an image. Descriptions in the prompt take precedence to descriptions in the negative prompt. Default value: `""` */ negative_prompt?: string; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The prompt to remix the image with */ prompt: string; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * Strength of the input image in the remix Default value: `0.8` */ strength?: number; /** * The style type to generate with. Cannot be used with style_codes. */ style?: "AUTO" | "GENERAL" | "REALISTIC" | "DESIGN"; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IdeogramV3ReplaceBackgroundInput = { /** * A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members) */ color_palette?: ColorPalette; /** * Determine if MagicPrompt should be used in generating the request or not. Default value: `true` */ expand_prompt?: boolean; /** * The image URL whose background needs to be replaced */ image_url: string | Blob | File; /** * A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format */ image_urls?: Array; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * Cyber punk city with neon lights and skyscrappers */ prompt: string; /** * The rendering speed to use. Default value: `"BALANCED"` */ rendering_speed?: "TURBO" | "BALANCED" | "QUALITY"; /** * Seed for the random number generator */ seed?: number; /** * The style type to generate with. Cannot be used with style_codes. */ style?: "AUTO" | "GENERAL" | "REALISTIC" | "DESIGN"; /** * A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style */ style_codes?: Array; /** * Style preset for generation. The chosen style preset will guide the generation. */ style_preset?: "80S_ILLUSTRATION" | "90S_NOSTALGIA" | "ABSTRACT_ORGANIC" | "ANALOG_NOSTALGIA" | "ART_BRUT" | "ART_DECO" | "ART_POSTER" | "AURA" | "AVANT_GARDE" | "BAUHAUS" | "BLUEPRINT" | "BLURRY_MOTION" | "BRIGHT_ART" | "C4D_CARTOON" | "CHILDRENS_BOOK" | "COLLAGE" | "COLORING_BOOK_I" | "COLORING_BOOK_II" | "CUBISM" | "DARK_AURA" | "DOODLE" | "DOUBLE_EXPOSURE" | "DRAMATIC_CINEMA" | "EDITORIAL" | "EMOTIONAL_MINIMAL" | "ETHEREAL_PARTY" | "EXPIRED_FILM" | "FLAT_ART" | "FLAT_VECTOR" | "FOREST_REVERIE" | "GEO_MINIMALIST" | "GLASS_PRISM" | "GOLDEN_HOUR" | "GRAFFITI_I" | "GRAFFITI_II" | "HALFTONE_PRINT" | "HIGH_CONTRAST" | "HIPPIE_ERA" | "ICONIC" | "JAPANDI_FUSION" | "JAZZY" | "LONG_EXPOSURE" | "MAGAZINE_EDITORIAL" | "MINIMAL_ILLUSTRATION" | "MIXED_MEDIA" | "MONOCHROME" | "NIGHTLIFE" | "OIL_PAINTING" | "OLD_CARTOONS" | "PAINT_GESTURE" | "POP_ART" | "RETRO_ETCHING" | "RIVIERA_POP" | "SPOTLIGHT_80S" | "STYLIZED_RED" | "SURREAL_COLLAGE" | "TRAVEL_POSTER" | "VINTAGE_GEO" | "VINTAGE_POSTER" | "WATERCOLOR" | "WEIRD" | "WOODBLOCK_PRINT"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type IllusionDiffusionInput = { /** * Default value: `1` */ control_guidance_end?: number; /** * */ control_guidance_start?: number; /** * The scale of the ControlNet. Default value: `1` */ controlnet_conditioning_scale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. You can choose between some presets or * custom height and width that **must be multiples of 8**. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Input image url. */ image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `40` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * Scheduler / sampler to use for the image denoising process. Default value: `"Euler"` */ scheduler?: "DPM++ Karras SDE" | "Euler"; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed?: number; }; export type Image = { /** * The mime type of the file. */ content_type?: string; /** * File data */ file_data?: string; /** * The name of the file. It will be auto-generated if not provided. */ file_name?: string; /** * The size of the file in bytes. */ file_size?: number; /** * The height of the image in pixels. */ height?: number; /** * The URL where the file can be downloaded from. */ url: string; /** * The width of the image in pixels. */ width?: number; }; export type image2pixelInput = { /** * Alpha binarization threshold (0-255). Default value: `128` */ alpha_threshold?: number; /** * Enable automatic detection of optimal number of colors. */ auto_color_detect?: boolean; /** * Controls where to flood-fill from when removing the background. Default value: `"corners"` */ background_mode?: "edges" | "corners" | "midpoints"; /** * Background tolerance (0-255). */ background_tolerance?: number; /** * Remove isolated diagonal pixels (jaggy edge cleanup). */ cleanup_jaggy?: boolean; /** * Apply morphological operations to remove noise. */ cleanup_morph?: boolean; /** * Scale detection method to use. Default value: `"auto"` */ detect_method?: "auto" | "runs" | "edge"; /** * Dominant color threshold (0.0-1.0). Default value: `0.05` */ dominant_color_threshold?: number; /** * Downscaling method to produce the pixel-art output. Default value: `"dominant"` */ downscale_method?: "dominant" | "median" | "mode" | "mean" | "content-adaptive"; /** * Optional fixed color palette as hex strings (e.g., ['#000000', '#ffffff']). */ fixed_palette?: Array; /** * The image URL to process into improved pixel art */ image_url: string | Blob | File; /** * Maximum number of colors in the output palette. Set None to disable limit. Default value: `32` */ max_colors?: number; /** * Force a specific pixel scale. If None, auto-detect. */ scale?: number; /** * Align output to the pixel grid. Default value: `true` */ snap_grid?: boolean; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Remove background of the image. This will check for contiguous color regions from the edges after correction and make them transparent. */ transparent_background?: boolean; /** * Trim borders of the image. */ trim_borders?: boolean; }; export type image2pixelOutput = { /** * The processed pixel-art image (PNG) and the scaled image (PNG). */ images: Array; /** * The number of colors in the processed media. */ num_colors: number; /** * The palette of the processed media. */ palette: Array; /** * The detected pixel scale of the input. */ pixel_scale: number; }; export type image2svgInput = { /** * Color quantization level Default value: `6` */ color_precision?: number; /** * Choose between color or binary (black and white) output Default value: `"color"` */ colormode?: "color" | "binary"; /** * Corner detection threshold in degrees Default value: `60` */ corner_threshold?: number; /** * Filter out small speckles and noise Default value: `4` */ filter_speckle?: number; /** * Hierarchical mode: stacked or cutout Default value: `"stacked"` */ hierarchical?: "stacked" | "cutout"; /** * The image to convert to SVG */ image_url: string | Blob | File; /** * Layer difference threshold for hierarchical mode Default value: `16` */ layer_difference?: number; /** * Length threshold for curves/lines Default value: `4` */ length_threshold?: number; /** * Maximum number of iterations for optimization Default value: `10` */ max_iterations?: number; /** * Mode: spline (curved) or polygon (straight lines) Default value: `"spline"` */ mode?: "spline" | "polygon"; /** * Decimal precision for path coordinates Default value: `3` */ path_precision?: number; /** * Splice threshold for joining paths Default value: `45` */ splice_threshold?: number; }; export type ImageAppsV2ExpressionChangeInput = { /** * Aspect ratio for 4K output (default: 3:4 for portraits) */ aspect_ratio?: AspectRatio; /** * Portrait image URL for expression change */ image_url: string | Blob | File; /** * Default value: `"smile"` */ target_expression?: "smile" | "surprise" | "glare" | "panic" | "shyness" | "laugh" | "cry" | "angry" | "sad" | "happy" | "excited" | "shocked" | "confused" | "focused" | "dreamy" | "serious" | "playful" | "mysterious" | "confident" | "thoughtful"; }; export type ImageAppsV2HairChangeInput = { /** * Aspect ratio for 4K output (default: 3:4 for portraits) */ aspect_ratio?: AspectRatio; /** * Default value: `"natural"` */ hair_color?: "black" | "dark_brown" | "light_brown" | "blonde" | "platinum_blonde" | "red" | "auburn" | "gray" | "silver" | "blue" | "green" | "purple" | "pink" | "rainbow" | "natural" | "highlights" | "ombre" | "balayage"; /** * Portrait image URL for hair change */ image_url: string | Blob | File; /** * Default value: `"long_hair"` */ target_hairstyle?: "short_hair" | "medium_long_hair" | "long_hair" | "curly_hair" | "wavy_hair" | "high_ponytail" | "bun" | "bob_cut" | "pixie_cut" | "braids" | "straight_hair" | "afro" | "dreadlocks" | "buzz_cut" | "mohawk" | "bangs" | "side_part" | "middle_part"; }; export type ImageAppsV2ObjectRemovalInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Image URL containing object to remove */ image_url: string | Blob | File; /** * Object to remove */ object_to_remove: string; }; export type ImageAppsV2PhotoRestorationInput = { /** * Aspect ratio for 4K output (default: 4:3 for classic photos) */ aspect_ratio?: AspectRatio; /** * Default value: `true` */ enhance_resolution?: boolean; /** * Default value: `true` */ fix_colors?: boolean; /** * Old or damaged photo URL to restore */ image_url: string | Blob | File; /** * Default value: `true` */ remove_scratches?: boolean; }; export type ImageAppsV2StyleTransferInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Image URL for style transfer */ image_url: string | Blob | File; /** * Optional reference image URL. When provided, the style will be inferred from this image instead of the selected preset style. */ style_reference_image_url?: string | Blob | File; /** * Default value: `"impressionist"` */ target_style?: "anime_character" | "cartoon_3d" | "hand_drawn_animation" | "cyberpunk_future" | "anime_game_style" | "comic_book_animation" | "animated_series" | "cartoon_animation" | "lofi_aesthetic" | "cottagecore" | "dark_academia" | "y2k" | "vaporwave" | "liminal_space" | "weirdcore" | "dreamcore" | "synthwave" | "outrun" | "photorealistic" | "hyperrealistic" | "digital_art" | "concept_art" | "impressionist" | "anime" | "pixel_art" | "claymation"; }; export type ImageChatOutput = { /** * Dictionary of label: mask image */ masks: Array; /** * Generated output */ output: string; }; export type ImageCondition = { /** * The URL of the image to use as input. */ image_url: string | Blob | File; /** * The frame number to start the condition on. */ start_frame_number?: number; /** * The strength of the condition. Default value: `1` */ strength?: number; }; export type ImageConditioningInput = { /** * URL of image to use as conditioning */ image_url: string | Blob | File; /** * Frame number of the image from which the conditioning starts. Must be a multiple of 8. */ start_frame_num?: number; /** * Strength of the conditioning. 0.0 means no conditioning, 1.0 means full conditioning. Default value: `1` */ strength?: number; }; export type ImageEditInput = { /** * Enable LLM prompt optimization. Significantly improves results for simple prompts but adds 3-4 seconds processing time. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Enable content moderation for input and output. Default value: `true` */ enable_safety_checker?: boolean; /** * Output image size. Use presets like 'square_hd', 'landscape_16_9', 'portrait_9_16', or specify exact dimensions with ImageSize(width=1280, height=720). Total pixels must be between 768*768 and 1280*1280. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Reference images for editing (1-3 images required). Order matters: reference as 'image 1', 'image 2', 'image 3' in prompt. Resolution: 384-5000px each dimension. Max size: 10MB each. Formats: JPEG, JPG, PNG (no alpha), BMP, WEBP. */ image_urls: Array; /** * Content to avoid in the generated image. Max 500 characters. Default value: `""` */ negative_prompt?: string; /** * Number of images to generate (1-4). Directly affects billing cost. Default value: `1` */ num_images?: number; /** * Text prompt describing the desired image. Supports Chinese and English. Max 2000 characters. Example: 'Generate an image using the style of image 1 and background of image 2'. */ prompt: string; /** * Random seed for reproducibility (0-2147483647). Same seed produces more consistent results. */ seed?: number; }; export type ImageFile = { /** * The mime type of the file. */ content_type?: string; /** * The name of the file. It will be auto-generated if not provided. */ file_name?: string; /** * The size of the file in bytes. */ file_size?: number; /** * The height of the image */ height?: number; /** * The URL where the file can be downloaded from. */ url: string; /** * The width of the image */ width?: number; }; export type ImageFillInput = { /** * URLs of images to be filled into the masked area. */ fill_image_url?: Array | string | Blob | File; /** * Uses the provided fill image in context with the base image to fill in more faithfully. Will increase price. */ in_context_fill?: boolean; /** * Whether to use the prompt as well in the generation, along with the redux image. */ use_prompt?: boolean; }; export type ImageGridInput = { /** * Background color for empty cells and spacing Default value: `"white"` */ background_color?: "white" | "black" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta" | "transparent"; /** * Height of each cell in pixels (if not set, uses first image height) */ cell_height?: number; /** * Width of each cell in pixels (if not set, uses first image width) */ cell_width?: number; /** * Number of columns in the grid Default value: `2` */ columns?: number; /** * How images fit in cells Default value: `"cover"` */ fit_mode?: "cover" | "contain" | "stretch"; /** * List of image URLs to arrange in grid */ image_urls: Array; /** * Output format for the grid image Default value: `"png"` */ output_format?: "png" | "jpg" | "jpeg" | "webp"; /** * Spacing between cells in pixels */ spacing?: number; }; export type ImageInput = { /** * The mime type of the file. */ content_type?: string; /** * File data */ file_data?: string; /** * The name of the file. It will be auto-generated if not provided. */ file_name?: string; /** * The size of the file in bytes. */ file_size?: number; /** * The height of the image in pixels. */ height?: number; /** * The URL where the file can be downloaded from. */ url: string; /** * The width of the image in pixels. */ width?: number; }; export type imagen3Input = { /** * The aspect ratio of the generated image Default value: `"1:1"` */ aspect_ratio?: "1:1" | "16:9" | "9:16" | "3:4" | "4:3"; /** * A description of what to discourage in the generated images Default value: `""` */ negative_prompt?: string; /** * Number of images to generate (1-4) Default value: `1` */ num_images?: number; /** * The text prompt describing what you want to see */ prompt: string; /** * Random seed for reproducible generation */ seed?: number; }; export type imagen3Output = { /** * */ images: Array; /** * Seed used for generation */ seed: number; }; export type Imagen4PreviewFastInput = { /** * The aspect ratio of the generated image. Default value: `"1:1"` */ aspect_ratio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The text prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Imagen4PreviewInput = { /** * The aspect ratio of the generated image. Default value: `"1:1"` */ aspect_ratio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The text prompt to generate an image from. */ prompt: string; /** * The resolution of the generated image. Default value: `"1K"` */ resolution?: "1K" | "2K"; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ImageOutput = { /** * The mime type of the file. */ content_type?: string; /** * The name of the file. It will be auto-generated if not provided. */ file_name?: string; /** * The size of the file in bytes. */ file_size?: number; /** * The height of the image in pixels. */ height?: number; /** * The URL where the file can be downloaded from. */ url: string; /** * The width of the image in pixels. */ width?: number; }; export type ImagePrompt = { /** * */ image_url?: string | Blob | File; /** * Default value: `0.5` */ stop_at?: number; /** * Default value: `"ImagePrompt"` */ type?: "ImagePrompt" | "PyraCanny" | "CPDS" | "FaceSwap"; /** * Default value: `1` */ weight?: number; }; export type ImageSize = { /** * The height of the generated image. Default value: `512` */ height?: number; /** * The width of the generated image. Default value: `512` */ width?: number; }; export type ImageTo3dInput = { /** * Automatically scale the model to real-world dimensions, with the unit in meters. The default value is False. */ auto_size?: boolean; /** * Limits the number of faces on the output model. If this option is not set, the face limit will be adaptively determined. */ face_limit?: number; /** * URL of the image to use for model generation. */ image_url: string | Blob | File; /** * Set orientation=align_image to automatically rotate the model to align the original image. The default value is default. Default value: `"default"` */ orientation?: "default" | "align_image"; /** * A boolean option to enable pbr. The default value is True, set False to get a model without pbr. If this option is set to True, texture will be ignored and used as True. */ pbr?: boolean; /** * Set True to enable quad mesh output (extra $0.05 per generation). If quad=True and face_limit is not set, the default face_limit will be 10000. Note: Enabling this option will force the output to be an FBX model. */ quad?: boolean; /** * This is the random seed for model generation. The seed controls the geometry generation process, ensuring identical models when the same seed is used. This parameter is an integer and is randomly chosen if not set. */ seed?: number; /** * An option to enable texturing. Default is 'standard', set 'no' to get a model without any textures, and set 'HD' to get a model with hd quality textures. Default value: `"standard"` */ texture?: "no" | "standard" | "HD"; /** * Determines the prioritization of texture alignment in the 3D model. The default value is original_image. Default value: `"original_image"` */ texture_alignment?: "original_image" | "geometry"; /** * This is the random seed for texture generation. Using the same seed will produce identical textures. This parameter is an integer and is randomly chosen if not set. If you want a model with different textures, please use same seed and different texture_seed. */ texture_seed?: number; }; export type ImageTo3DInput = { /** * Animation preset ID from Meshy's library (500+ presets). Only used when enable_animation is true. See https://docs.meshy.ai/en/api/animation-library for available action IDs. Default value: `1001` */ animation_action_id?: number; /** * Apply an animation preset to the rigged model. Requires enable_rigging to be true. */ enable_animation?: boolean; /** * Generate PBR Maps (metallic, roughness, normal) in addition to base color */ enable_pbr?: boolean; /** * Automatically rig the generated model as a humanoid character. Includes basic walking and running animations. Best results with humanoid characters that have clearly defined limbs. */ enable_rigging?: boolean; /** * If set to true, input data will be checked for safety before processing. Default value: `true` */ enable_safety_checker?: boolean; /** * Image URL or base64 data URI for 3D model creation. Supports .jpg, .jpeg, and .png formats. Also supports AVIF and HEIF formats which will be automatically converted. */ image_url: string | Blob | File; /** * Pose mode for the generated model. 'a-pose' generates an A-pose, 't-pose' generates a T-pose, empty string for no specific pose. Default value: `""` */ pose_mode?: "a-pose" | "t-pose" | ""; /** * Approximate height of the character in meters. Only used when enable_rigging is true. Default value: `1.7` */ rigging_height_meters?: number; /** * Whether to enable the remesh phase Default value: `true` */ should_remesh?: boolean; /** * Whether to generate textures Default value: `true` */ should_texture?: boolean; /** * Controls symmetry behavior during model generation. Off disables symmetry, Auto determines it automatically, On enforces symmetry. Default value: `"auto"` */ symmetry_mode?: "off" | "auto" | "on"; /** * Target number of polygons in the generated model Default value: `30000` */ target_polycount?: number; /** * 2D image to guide the texturing process */ texture_image_url?: string | Blob | File; /** * Text prompt to guide the texturing process */ texture_prompt?: string; /** * Specify the topology of the generated model. Quad for smooth surfaces, Triangle for detailed geometry. Default value: `"triangle"` */ topology?: "quad" | "triangle"; }; export type ImageTo3DOutput = { /** * Animated 3D model in FBX format. Only present when enable_animation is true. */ animation_fbx?: File; /** * Animated 3D model in GLB format. Only present when enable_animation is true. */ animation_glb?: File; /** * Basic walking and running animations. Only present when enable_rigging is true. */ basic_animations?: BasicAnimations; /** * Generated 3D object in GLB format. */ model_glb: File; /** * URLs for different 3D model formats */ model_urls: ModelUrls; /** * Rigging task ID. Only present when enable_rigging is true. */ rig_task_id?: string; /** * Rigged character in FBX format. Only present when enable_rigging is true. */ rigged_character_fbx?: File; /** * Rigged character in GLB format. Only present when enable_rigging is true. */ rigged_character_glb?: File; /** * The seed used for generation (if available) */ seed?: number; /** * Array of texture file objects, matching Meshy API structure */ texture_urls?: Array; /** * Preview thumbnail of the generated model */ thumbnail?: File; }; export type ImageToImageControlNetInput = { /** * The URL of the control image. */ control_image_url: string | Blob | File; /** * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type ImageToImageControlNetUnionInput = { /** * The URL of the control image. */ canny_image_url?: string | Blob | File; /** * Whether to preprocess the canny image. Default value: `true` */ canny_preprocess?: boolean; /** * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** * If set to true, the output cropped to the proper aspect ratio after generating. */ crop_output?: boolean; /** * The URL of the control image. */ depth_image_url?: string | Blob | File; /** * Whether to preprocess the depth image. Default value: `true` */ depth_preprocess?: boolean; /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The URL of the control image. */ normal_image_url?: string | Blob | File; /** * Whether to preprocess the normal image. Default value: `true` */ normal_preprocess?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The URL of the control image. */ openpose_image_url?: string | Blob | File; /** * Whether to preprocess the openpose image. Default value: `true` */ openpose_preprocess?: boolean; /** * If set to true, the aspect ratio of the generated image will be preserved even * if the image size is too large. However, if the image is not a multiple of 32 * in width or height, it will be resized to the nearest multiple of 32. By default, * this snapping to the nearest multiple of 32 will not preserve the aspect ratio. * Set crop_output to True, to crop the output to the proper aspect ratio * after generating. */ preserve_aspect_ratio?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The URL of the control image. */ segmentation_image_url?: string | Blob | File; /** * Whether to preprocess the segmentation image. Default value: `true` */ segmentation_preprocess?: boolean; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The URL of the control image. */ teed_image_url?: string | Blob | File; /** * Whether to preprocess the teed image. Default value: `true` */ teed_preprocess?: boolean; }; export type ImageToImageFooocusInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` */ enable_refiner?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. Default value: `true` */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * The size of the generated image. Leave it none to automatically infer from the prompt image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `8` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; }; export type ImageToImageInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type ImageToImageLCMInput = { /** * If set to true, the output cropped to the proper aspect ratio after generating. */ crop_output?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` */ model_name?: "stabilityai/stable-diffusion-xl-base-1.0" | "runwayml/stable-diffusion-v1-5"; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `6` */ num_inference_steps?: number; /** * If set to true, the aspect ratio of the generated image will be preserved even * if the image size is too large. However, if the image is not a multiple of 32 * in width or height, it will be resized to the nearest multiple of 32. By default, * this snapping to the nearest multiple of 32 will not preserve the aspect ratio. * Set crop_output to True, to crop the output to the proper aspect ratio * after generating. */ preserve_aspect_ratio?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. Default value: `true` */ sync_mode?: boolean; }; export type ImageToImageLightningInput = { /** * If set to true, the output cropped to the proper aspect ratio after generating. */ crop_output?: boolean; /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `"4"` */ num_inference_steps?: "1" | "2" | "4" | "8"; /** * If set to true, the aspect ratio of the generated image will be preserved even * if the image size is too large. However, if the image is not a multiple of 32 * in width or height, it will be resized to the nearest multiple of 32. By default, * this snapping to the nearest multiple of 32 will not preserve the aspect ratio. * Set crop_output to True, to crop the output to the proper aspect ratio * after generating. */ preserve_aspect_ratio?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ImageToImagePlaygroundv25Input = { /** * If set to true, the output cropped to the proper aspect ratio after generating. */ crop_output?: boolean; /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * If set to true, the aspect ratio of the generated image will be preserved even * if the image size is too large. However, if the image is not a multiple of 32 * in width or height, it will be resized to the nearest multiple of 32. By default, * this snapping to the nearest multiple of 32 will not preserve the aspect ratio. * Set crop_output to True, to crop the output to the proper aspect ratio * after generating. */ preserve_aspect_ratio?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; }; export type ImageToImageSD15Input = { /** * If set to true, the output cropped to the proper aspect ratio after generating. */ crop_output?: boolean; /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * If set to true, the aspect ratio of the generated image will be preserved even * if the image size is too large. However, if the image is not a multiple of 32 * in width or height, it will be resized to the nearest multiple of 32. By default, * this snapping to the nearest multiple of 32 will not preserve the aspect ratio. * Set crop_output to True, to crop the output to the proper aspect ratio * after generating. */ preserve_aspect_ratio?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ImageToImageTurboInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. */ guidance_scale?: number; /** * The size of the generated image. Defaults to landscape_4_3 if no controlnet has been passed, otherwise defaults to the size of the controlnet conditioning image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of Image for Image-to-Image */ image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Strength for Image-to-Image. Default value: `0.83` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ImageToLottieInput = { /** * URL of the reference image to animate. */ image_url: string | Blob | File; /** * Maximum number of Lottie tokens to generate. Default value: `4096` */ max_tokens?: number; /** * Text description guiding the animation of the image. */ prompt: string; /** * Sampling temperature for generation. Default value: `0.9` */ temperature?: number; /** * Top-k sampling parameter. Default value: `5` */ top_k?: number; /** * Nucleus sampling probability threshold. Default value: `0.25` */ top_p?: number; }; export type ImageToSVGInput = { /** * Enable input safety checking. Default value: `true` */ enable_safety_checker?: boolean; /** * Fill color for the generated glyphs. Accepts any valid SVG/CSS color value. Default value: `"black"` */ fill_color?: string; /** * Maximum tokens to generate. Default value: `8192` */ max_tokens?: number; /** * Maximum dimension (width or height) of the output SVG in pixels. The aspect ratio is preserved. Default value: `512` */ output_size?: number; /** * The target text to generate as vector glyphs, styled to match the reference images. */ prompt: string; /** * URLs of reference glyph images whose style should be matched. Provide 1 to 8 images of individual glyph characters. */ reference_image_urls: Array; /** * Repetition penalty. Default value: `1` */ repetition_penalty?: number; /** * Random seed for reproducibility. */ seed?: number; /** * Optional stroke (outline) color for the generated glyphs. When set, adds an outline around each glyph path. */ stroke_color?: string; /** * Stroke width in SVG units. Only applies when stroke_color is set. Default value: `1` */ stroke_width?: number; /** * Sampling temperature. Default value: `0.1` */ temperature?: number; /** * Top-k sampling parameter. Default value: `5` */ top_k?: number; /** * Top-p (nucleus) sampling parameter. Default value: `0.95` */ top_p?: number; }; export type ImageToVideoInput = { /** * URL of the audio to use as the background music. Must be publicly accessible. * Limit handling: If the audio duration exceeds the duration value (5, 10, or 15 seconds), * the audio is truncated to the first N seconds, and the rest is discarded. If * the audio is shorter than the video, the remaining part of the video will be silent. * For example, if the audio is 3 seconds long and the video duration is 5 seconds, the * first 3 seconds of the output video will have sound, and the last 2 seconds will be silent. * - Format: WAV, MP3. * - Duration: 3 to 30 s. * - File size: Up to 15 MB. */ audio_url?: string | Blob | File; /** * Duration of the generated video in seconds. Choose between 5, 10 or 15 seconds. Default value: `"5"` */ duration?: "5" | "10" | "15"; /** * Whether to enable prompt rewriting using LLM. Default value: `true` */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * URL of the image to use as the first frame. Must be publicly accessible or base64 data URI. Image dimensions must be between 240 and 7680. */ image_url: string | Blob | File; /** * When true, enables intelligent multi-shot segmentation. Only active when enable_prompt_expansion is True. Set to false for single-shot generation. */ multi_shots?: boolean; /** * Negative prompt to describe content to avoid. Max 500 characters. Default value: `""` */ negative_prompt?: string; /** * The text prompt describing the desired video motion. Max 800 characters. */ prompt: string; /** * Video resolution. Valid values: 720p, 1080p Default value: `"1080p"` */ resolution?: "720p" | "1080p"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; }; export type ImageToVideov21Input = { /** * The duration of the generated video in seconds Default value: `5` */ duration?: number; /** * */ image_url: string | Blob | File; /** * A negative prompt to guide the model Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "720p" | "1080p"; /** * The seed for the random number generator */ seed?: number; }; export type ImageutilsNsfwOutput = { /** * The probability of the image being NSFW. */ nsfw_probability: number; }; export type ImageutilsRembgInput = { /** * If set to true, the resulting image be cropped to a bounding box around the subject */ crop_to_bbox?: boolean; /** * Input image url. */ image_url: string | Blob | File; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ImageWithTextInput = { /** * The URL of the image to be processed. */ image_url: string | Blob | File; /** * Text input for the task */ text_input: string; }; export type ImageWithUserCoordinatesInput = { /** * The URL of the image to be processed. */ image_url: string | Blob | File; /** * The user input coordinates */ region: Region; }; export type Imagineart15PreviewTextToImageOutput = { /** * Generated image */ images: Array; }; export type ImpulseResponseInput = { /** * URL of the main audio file to process */ audio_url: string | Blob | File; /** * Level of the original (dry) signal in the mix (0.0-1.0) Default value: `0.7` */ dry_level?: number; /** * URL of the impulse response WAV file (reverb/effect profile) */ impulse_response_url: string | Blob | File; /** * Target integrated loudness in LUFS (typically -24 to -14) Default value: `-18` */ loudness_i?: number; /** * Loudness Range target in LU (typically 5-15) Default value: `8` */ loudness_lra?: number; /** * Maximum true peak in dBTP (typically -2 to -1) Default value: `-1.5` */ loudness_tp?: number; /** * Output audio bitrate Default value: `"192k"` */ output_bitrate?: "128k" | "192k" | "256k" | "320k"; /** * Level of the processed (wet) signal in the mix (0.0-1.0) Default value: `0.3` */ wet_level?: number; }; export type IndexTts2TextToSpeechInput = { /** * The audio file to generate the speech from. */ audio_url: string | Blob | File; /** * The emotional prompt to influence the emotional style. Must be used together with should_use_prompt_for_emotion. */ emotion_prompt?: string; /** * The emotional reference audio file to extract the style from. */ emotional_audio_url?: string | Blob | File; /** * The strengths of individual emotions for fine-grained control. */ emotional_strengths?: EmotionalStrengths; /** * The speech prompt to generate */ prompt: string; /** * Whether to use the `prompt` to calculate emotional strengths, if enabled it will overwrite the `emotional_strengths` values. If `emotion_prompt` is provided, it will be used to instead of `prompt` to extract the emotional style. */ should_use_prompt_for_emotion?: boolean; /** * The strength of the emotional style transfer. Higher values result in stronger emotional influence. Default value: `1` */ strength?: number; }; export type infinitalkInput = { /** * The acceleration level to use for generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * The URL of the audio file. */ audio_url: string | Blob | File; /** * URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped. */ image_url: string | Blob | File; /** * Number of frames to generate. Must be between 41 to 721. Default value: `145` */ num_frames?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the video to generate. Must be either 480p or 720p. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. If None, a random seed is chosen. Default value: `42` */ seed?: number; }; export type InfinitalkVideoToVideoInput = { /** * The acceleration level to use for generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * The URL of the audio file. */ audio_url: string | Blob | File; /** * Number of frames to generate. Must be between 81 to 129 (inclusive). If the number of frames is greater than 81, the video will be generated with 1.25x more billing units. Default value: `145` */ num_frames?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the video to generate. Must be either 480p or 720p. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. If None, a random seed is chosen. Default value: `42` */ seed?: number; /** * URL of the input video. */ video_url: string | Blob | File; }; export type InfinityStarTextToVideoInput = { /** * Aspect ratio of the generated output Default value: `"16:9"` */ aspect_ratio?: "16:9" | "1:1" | "9:16"; /** * Whether to use an LLM to enhance the prompt. Default value: `true` */ enhance_prompt?: boolean; /** * Guidance scale for generation Default value: `7.5` */ guidance_scale?: number; /** * Negative prompt to guide what to avoid in generation Default value: `""` */ negative_prompt?: string; /** * Number of inference steps Default value: `50` */ num_inference_steps?: number; /** * Text prompt for generating the video */ prompt: string; /** * Random seed for reproducibility. Leave empty for random generation. */ seed?: number; /** * Tau value for video scale Default value: `0.4` */ tau_video?: number; /** * Whether to use APG Default value: `true` */ use_apg?: boolean; }; export type InpaintingControlNetInput = { /** * The URL of the control image. */ control_image_url: string | Blob | File; /** * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type InpaintingControlNetUnionInput = { /** * The URL of the control image. */ canny_image_url?: string | Blob | File; /** * Whether to preprocess the canny image. Default value: `true` */ canny_preprocess?: boolean; /** * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** * The URL of the control image. */ depth_image_url?: string | Blob | File; /** * Whether to preprocess the depth image. Default value: `true` */ depth_preprocess?: boolean; /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The URL of the control image. */ normal_image_url?: string | Blob | File; /** * Whether to preprocess the normal image. Default value: `true` */ normal_preprocess?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The URL of the control image. */ openpose_image_url?: string | Blob | File; /** * Whether to preprocess the openpose image. Default value: `true` */ openpose_preprocess?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The URL of the control image. */ segmentation_image_url?: string | Blob | File; /** * Whether to preprocess the segmentation image. Default value: `true` */ segmentation_preprocess?: boolean; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The URL of the control image. */ teed_image_url?: string | Blob | File; /** * Whether to preprocess the teed image. Default value: `true` */ teed_preprocess?: boolean; }; export type InpaintingFooocusInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` */ enable_refiner?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. Default value: `true` */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `8` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; }; export type InpaintingInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type InpaintingLCMInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` */ model_name?: "stabilityai/stable-diffusion-xl-base-1.0" | "runwayml/stable-diffusion-v1-5"; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `6` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. Default value: `true` */ sync_mode?: boolean; }; export type InpaintingLightningInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `"4"` */ num_inference_steps?: "1" | "2" | "4" | "8"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type InpaintingPlaygroundv25Input = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; }; export type inpaintInput = { /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * Input image for img2img or inpaint mode */ image_url: string | Blob | File; /** * Input mask for inpaint mode. Black areas will be preserved, white areas will be inpainted. */ mask_url: string | Blob | File; /** * URL or HuggingFace ID of the base model to generate the image. */ model_name: string; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; }; export type InpaintInput = { /** * The URL of the audio file to alter. Must be a valid publicly accessible URL. */ audio_url: string | Blob | File; /** * Greater means more natural vocals. Lower means sharper instrumentals. We recommend 0.7. Default value: `0.7` */ balance_strength?: number; /** * The lyrics sung in the generated song. An empty string will generate an instrumental track. */ lyrics_prompt: string; /** * Generating 2 songs costs 1.5x the price of generating 1 song. Also, note that using the same seed may not result in identical songs if the number of songs generated is changed. Default value: `1` */ num_songs?: number; /** * The bit rate to use for mp3 and m4a formats. Not available for other formats. */ output_bit_rate?: "128" | "192" | "256" | "320"; /** * Default value: `"wav"` */ output_format?: "flac" | "mp3" | "wav" | "ogg" | "m4a"; /** * Controls how strongly your prompt influences the output. Greater values adhere more to the prompt but sound less natural. (This is CFG.) Default value: `2` */ prompt_strength?: number; /** * List of sections to inpaint. Currently, only one section is supported so the list length must be 1. */ sections: Array; /** * The seed to use for generation. Will pick a random seed if not provided. Repeating a request with identical parameters (must use lyrics and tags, not prompt) and the same seed will generate the same song. */ seed?: number; /** * Crop to the selected region */ selection_crop?: boolean; /** * Tags/styles of the music to generate. You can view a list of all available tags at https://sonauto.ai/tag-explorer. */ tags?: Array; }; export type InpaintOutput = { /** * The generated audio files. */ audio: Array; /** * The seed used for generation. This can be used to generate an identical song by passing the same parameters with this seed in a future request. */ seed: number; }; export type InpaintSection = { /** * End time in seconds of the section to inpaint. */ end: number; /** * Start time in seconds of the section to inpaint. */ start: number; }; export type InpaintTurboInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. */ guidance_scale?: number; /** * The size of the generated image. Defaults to landscape_4_3 if no controlnet has been passed, otherwise defaults to the size of the controlnet conditioning image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of Image for inpainting */ image_url: string | Blob | File; /** * URL of mask image for inpainting. */ mask_image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Strength for Image-to-Image. Default value: `0.83` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Input = { /** * List of tracks to be combined into the final media */ tracks: Array; }; export type InstantCharacterInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The image URL to generate an image from. Needs to match the dimensions of the mask. */ image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The scale of the subject image. Higher values will make the subject image more prominent in the generated image. Default value: `1` */ scale?: number; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type InterleaveVideoInput = { /** * List of video URLs to interleave in order */ video_urls: Array; }; export type InvisibleWatermarkInput = { /** * Whether to decode a watermark from the image instead of encoding */ decode?: boolean; /** * URL of image to be watermarked or decoded */ image_url: string | Blob | File; /** * Length of watermark bits to decode (required when decode=True) */ length?: number; /** * Text to use as watermark (for encoding only) Default value: `"watermark"` */ watermark?: string; }; export type InvisibleWatermarkOutput = { /** * The extracted watermark text (when decoding) */ extracted_watermark?: string; /** * The watermarked image file info (when encoding) */ image?: Image; /** * Length of the watermark bits used (helpful for future decoding) */ length?: number; }; export type InworldTtsInput = { /** * The sample rate in Hz for the output audio. Default value: `"48000"` */ sample_rate_hertz?: "8000" | "16000" | "24000" | "32000" | "40000" | "48000"; /** * The text to synthesize into speech. */ text: string; /** * The voice to use for synthesis. Default value: `"Craig (en)"` */ voice?: "Loretta (en)" | "Darlene (en)" | "Marlene (en)" | "Hank (en)" | "Evelyn (en)" | "Celeste (en)" | "Pippa (en)" | "Tessa (en)" | "Liam (en)" | "Callum (en)" | "Hamish (en)" | "Abby (en)" | "Graham (en)" | "Rupert (en)" | "Mortimer (en)" | "Snik (en)" | "Anjali (en)" | "Saanvi (en)" | "Arjun (en)" | "Claire (en)" | "Oliver (en)" | "Simon (en)" | "Elliot (en)" | "James (en)" | "Serena (en)" | "Gareth (en)" | "Vinny (en)" | "Lauren (en)" | "Jessica (en)" | "Ethan (en)" | "Tyler (en)" | "Jason (en)" | "Chloe (en)" | "Veronica (en)" | "Victoria (en)" | "Miranda (en)" | "Sebastian (en)" | "Victor (en)" | "Malcolm (en)" | "Kayla (en)" | "Nate (en)" | "Jake (en)" | "Brian (en)" | "Amina (en)" | "Kelsey (en)" | "Derek (en)" | "Grant (en)" | "Evan (en)" | "Alex (en)" | "Ashley (en)" | "Craig (en)" | "Deborah (en)" | "Dennis (en)" | "Edward (en)" | "Elizabeth (en)" | "Hades (en)" | "Julia (en)" | "Pixie (en)" | "Mark (en)" | "Olivia (en)" | "Priya (en)" | "Ronald (en)" | "Sarah (en)" | "Shaun (en)" | "Theodore (en)" | "Timothy (en)" | "Wendy (en)" | "Dominus (en)" | "Hana (en)" | "Clive (en)" | "Carter (en)" | "Blake (en)" | "Luna (en)" | "Yichen (zh)" | "Xiaoyin (zh)" | "Xinyi (zh)" | "Jing (zh)" | "Erik (nl)" | "Katrien (nl)" | "Lennart (nl)" | "Lore (nl)" | "Alain (fr)" | "Hélène (fr)" | "Mathieu (fr)" | "Étienne (fr)" | "Johanna (de)" | "Josef (de)" | "Gianni (it)" | "Orietta (it)" | "Asuka (ja)" | "Satoshi (ja)" | "Hyunwoo (ko)" | "Minji (ko)" | "Seojun (ko)" | "Yoona (ko)" | "Szymon (pl)" | "Wojciech (pl)" | "Heitor (pt)" | "Maitê (pt)" | "Diego (es)" | "Lupita (es)" | "Miguel (es)" | "Rafael (es)" | "Svetlana (ru)" | "Elena (ru)" | "Dmitry (ru)" | "Nikolai (ru)" | "Riya (hi)" | "Manoj (hi)" | "Yael (he)" | "Oren (he)" | "Nour (ar)" | "Omar (ar)"; }; export type IPAdapter = { /** * Path to the Image Encoder for the IP-Adapter, for example 'openai/clip-vit-large-patch14' */ image_encoder_path: string; /** * Subfolder in which the image encoder weights exist. */ image_encoder_subfolder?: string; /** * Name of the image encoder. */ image_encoder_weight_name?: string; /** * URL of Image for IP-Adapter conditioning. */ image_url: string | Blob | File; /** * URL of the mask for the control image. */ mask_image_url?: string | Blob | File; /** * Threshold for mask. Default value: `0.5` */ mask_threshold?: number; /** * Hugging Face path to the IP-Adapter */ path: string; /** * Scale for ip adapter. */ scale: number; /** * Subfolder in which the ip_adapter weights exist */ subfolder?: string; /** * Name of the safetensors file containing the ip-adapter weights */ weight_name?: string; }; export type IpAdapterFaceIdInput = { /** * The URL to the base 1.5 model. Default is SG161222/Realistic_Vision_V4.0_noVAE Default value: `"SG161222/Realistic_Vision_V4.0_noVAE"` */ base_1_5_model_repo?: string; /** * The URL to the base SDXL model. Default is SG161222/RealVisXL_V3.0 Default value: `"SG161222/RealVisXL_V3.0"` */ base_sdxl_model_repo?: string; /** * The size of the face detection model. The higher the number the more accurate * the detection will be but it will also take longer to run. The higher the number the more * likely it will fail to find a face as well. Lower it if you are having trouble * finding a face in the image. Default value: `640` */ face_id_det_size?: number; /** * An image of a face to match. If an image with a size of 640x640 is not provided, it will be scaled and cropped to that size. */ face_image_url?: string | Blob | File; /** * URL to zip archive with images of faces. The images embedding will be averaged to * create a more accurate face id. */ face_images_data_url?: string | Blob | File; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The height of the generated image. Default value: `512` */ height?: number; /** * The model type to use. 1_5 is the default and is recommended for most use cases. Default value: `"1_5-v1"` */ model_type?: "1_5-v1" | "1_5-v1-plus" | "1_5-v2-plus" | "SDXL-v1" | "SDXL-v2-plus" | "1_5-auraface-v1"; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` */ negative_prompt?: string; /** * The number of inference steps to use for generating the image. The more steps * the better the image will be but it will also take longer to generate. Default value: `50` */ num_inference_steps?: number; /** * The number of samples for face id. The more samples the better the image will * be but it will also take longer to generate. Default is 4. Default value: `4` */ num_samples?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The width of the generated image. Default value: `512` */ width?: number; }; export type janusInput = { /** * Classifier Free Guidance scale - how closely to follow the prompt. Default value: `5` */ cfg_weight?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Default value: `square` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Number of images to generate in parallel. Default value: `1` */ num_images?: number; /** * The prompt to generate an image from. */ prompt: string; /** * Random seed for reproducible generation. */ seed?: number; /** * Controls randomness in the generation. Higher values make output more random. Default value: `1` */ temperature?: number; }; export type JsonExtractInput = { /** * In 'extract' mode: a JSON string to parse and extract from. In 'stringify' mode: any JSON value (object, array, number, etc.) to convert to a string. */ json_input: void; /** * Operation mode. 'extract' parses json_input and extracts values at given paths. 'stringify' converts json_input to a JSON string. Default value: `"extract"` */ mode?: "extract" | "stringify"; /** * List of JSON paths to extract (only used in 'extract' mode). Use dot notation for nested keys and brackets for array indices. */ paths?: Array; }; export type JsonExtractOutput = { /** * In 'stringify' mode: the JSON string representation. Null in 'extract' mode. */ text?: string; /** * In 'extract' mode: extracted values in the same order as input paths. In 'stringify' mode: single-element list containing the JSON string. */ values: Array; }; export type JuggernautFluxLoraInpaintingInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The mask to area to Inpaint in. */ mask_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type Kandinsky5ProImageToVideoInput = { /** * Acceleration level for faster generation. Default value: `regular` */ acceleration?: "none" | "regular"; /** * Video duration. Default value: `"5s"` */ duration?: string; /** * The URL of the image to use as a reference for the video generation. */ image_url: string | Blob | File; /** * Default value: `28` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * Video resolution: 512p or 1024p. Default value: `"512P"` */ resolution?: "512P" | "1024P"; }; export type Kandinsky5ProTextToVideoInput = { /** * Acceleration level for faster generation. Default value: `regular` */ acceleration?: "none" | "regular"; /** * Aspect ratio of the generated video. One of (3:2, 1:1, 2:3). Default value: `"3:2"` */ aspect_ratio?: "3:2" | "1:1" | "2:3"; /** * The length of the video to generate. Default value: `"5s"` */ duration?: string; /** * The number of inference steps. Default value: `28` */ num_inference_steps?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Video resolution: 512p or 1024p. Default value: `"512P"` */ resolution?: "512P" | "1024P"; }; export type Kandinsky5TextToVideoDistillInput = { /** * Aspect ratio of the generated video. One of (3:2, 1:1, 2:3). Default value: `"3:2"` */ aspect_ratio?: "3:2" | "1:1" | "2:3"; /** * The length of the video to generate (5s or 10s) Default value: `"5s"` */ duration?: "5s" | "10s"; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the generated video in W:H format. Will be calculated based on the aspect ratio(768x512, 512x512, 512x768). Default value: `"768x512"` */ resolution?: string; }; export type Kandinsky5TextToVideoInput = { /** * Aspect ratio of the generated video. One of (3:2, 1:1, 2:3). Default value: `"3:2"` */ aspect_ratio?: "3:2" | "1:1" | "2:3"; /** * The length of the video to generate (5s or 10s) Default value: `"5s"` */ duration?: "5s" | "10s"; /** * The number of inference steps. Default value: `30` */ num_inference_steps?: number; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the generated video in W:H format. Will be calculated based on the aspect ratio(768x512, 512x512, 512x768). Default value: `"768x512"` */ resolution?: string; }; export type Kandinsky5TextToVideoOutput = { /** * The generated video file. */ video?: File; }; export type Keyframe = { /** * The duration in milliseconds of this keyframe */ duration: number; /** * The timestamp in milliseconds where this keyframe starts */ timestamp: number; /** * The URL where this keyframe's media file can be accessed */ url: string; }; export type KeyframeTransition = { /** * Duration of this transition in seconds Default value: `5` */ duration?: number; /** * Specific prompt for this transition. Overrides the global prompt if provided. */ prompt?: string; }; export type KleinBaseEditLoRAInput = { /** * The acceleration level to use for image generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance scale for classifier-free guidance. Default value: `5` */ guidance_scale?: number; /** * The size of the generated image. If not provided, uses the input image size. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URLs of the images for editing. A maximum of 4 images are allowed. */ image_urls: Array; /** * List of LoRA weights to apply (maximum 3). */ loras?: Array; /** * Negative prompt for classifier-free guidance. Describes what to avoid in the image. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI. Output is not stored when this is True. */ sync_mode?: boolean; }; export type KleinBaseLoRAInput = { /** * The acceleration level to use for image generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance scale for classifier-free guidance. Default value: `5` */ guidance_scale?: number; /** * The size of the image to generate. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * List of LoRA weights to apply (maximum 3). */ loras?: Array; /** * Negative prompt for classifier-free guidance. Describes what to avoid in the image. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI. Output is not stored when this is True. */ sync_mode?: boolean; }; export type KleinLoRAInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the image to generate. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * List of LoRA weights to apply (maximum 3). */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The seed to use for the generation. If not provided, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI. Output is not stored when this is True. */ sync_mode?: boolean; }; export type KlingImageO1Input = { /** * Aspect ratio of generated images. 'auto' intelligently determines based on input content. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16" | "1:1" | "4:3" | "3:4" | "3:2" | "2:3" | "21:9"; /** * Elements (characters/objects) to include in the image. Reference in prompt as @Element1, @Element2, etc. Maximum 10 total (elements + reference images). */ elements?: Array; /** * List of reference images. Reference images in prompt using @Image1, @Image2, etc. (1-indexed). Max 10 images. */ image_urls: Array; /** * Number of images to generate (1-9). Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text prompt for image generation. Reference images using @Image1, @Image2, etc. (or @Image if only one image). Max 2500 characters. */ prompt: string; /** * Image generation resolution. 1K: standard, 2K: high-res. Default value: `"1K"` */ resolution?: "1K" | "2K"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type KlingImageO3ImageToImageInput = { /** * Aspect ratio of generated images. 'auto' intelligently determines based on input content. Default value: `"auto"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:3" | "3:4" | "3:2" | "2:3" | "21:9" | "auto"; /** * Optional: Elements (characters/objects) for face control. Reference in prompt as @Element1, @Element2, etc. */ elements?: Array; /** * List of reference images. Reference images in prompt using @Image1, @Image2, etc. (1-indexed). Max 10 images. */ image_urls: Array; /** * Number of images to generate (1-9). Only used when result_type is 'single'. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text prompt for image generation. Reference images using @Image1, @Image2, etc. (or @Image if only one image). Max 2500 characters. */ prompt: string; /** * Image generation resolution. 1K: standard, 2K: high-res, 4K: ultra high-res. Default value: `"1K"` */ resolution?: "1K" | "2K" | "4K"; /** * Result type. 'single' for one image, 'series' for a series of related images. Default value: `"single"` */ result_type?: "single" | "series"; /** * Number of images in series (2-9). Only used when result_type is 'series'. */ series_amount?: number; /** * If `True`, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type KlingImageO3TextToImageInput = { /** * Aspect ratio of generated images. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:3" | "3:4" | "3:2" | "2:3" | "21:9"; /** * Optional: Elements (characters/objects) for face control. Reference in prompt as @Element1, @Element2, etc. */ elements?: Array; /** * Number of images to generate (1-9). Only used when result_type is 'single'. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text prompt for image generation. Max 2500 characters. */ prompt: string; /** * Image generation resolution. 1K: standard, 2K: high-res, 4K: ultra high-res. Default value: `"1K"` */ resolution?: "1K" | "2K" | "4K"; /** * Result type. 'single' for one image, 'series' for a series of related images. Default value: `"single"` */ result_type?: "single" | "series"; /** * Number of images in series (2-9). Only used when result_type is 'series'. */ series_amount?: number; /** * If `True`, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type KlingImageV3ImageToImageInput = { /** * Aspect ratio of generated images. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:3" | "3:4" | "3:2" | "2:3" | "21:9"; /** * Optional: Elements (characters/objects) to include in the image for face control. */ elements?: Array; /** * Reference image for image-to-image generation. * * Max file size: 10.0MB, Min width: 300px, Min height: 300px, Min aspect ratio: 0.40, Max aspect ratio: 2.50, Timeout: 20.0s */ image_url: string | Blob | File; /** * Number of images to generate (1-9). Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text prompt for image generation. Max 2500 characters. */ prompt: string; /** * Image generation resolution. 1K: standard, 2K: high-res. Default value: `"1K"` */ resolution?: "1K" | "2K"; /** * If `True`, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type KlingImageV3TextToImageInput = { /** * Aspect ratio of generated images. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:3" | "3:4" | "3:2" | "2:3" | "21:9"; /** * Optional: Elements (characters/objects) to include in the image for face control. Each element can have a frontal image and optionally reference images. */ elements?: Array; /** * Negative text prompt. It is recommended to supplement negative prompt information through negative sentences directly within positive prompts. */ negative_prompt?: string; /** * Number of images to generate (1-9). Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text prompt for image generation. Max 2500 characters. */ prompt: string; /** * Image generation resolution. 1K: standard, 2K: high-res. Default value: `"1K"` */ resolution?: "1K" | "2K"; /** * If `True`, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type KlingV15KolorsVirtualTryOnInput = { /** * Url to the garment image. */ garment_image_url: string | Blob | File; /** * Url for the human image. */ human_image_url: string | Blob | File; /** * If true, the function will return the image in the response. */ sync_mode?: boolean; }; export type KlingV3ComboElementInput = { /** * The frontal image of the element (main view). */ frontal_image_url?: string | Blob | File; /** * Additional reference images from different angles. 1-3 images supported. At least one image is required. */ reference_image_urls?: Array; /** * The video URL of the element. A request can only have one element with a video. */ video_url?: string | Blob | File; /** * The voice ID for this element. The voice will be binded to the element and references to this element will use the binded voice. Voice binding is only supported for video elements, and cannot be used with image elements. Get voice IDs from the following endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice */ voice_id?: string; }; export type KlingV3MultiPromptElement = { /** * The duration of this shot in seconds Default value: `"5"` */ duration?: "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * The prompt for this shot. */ prompt: string; }; export type KlingVideoLipsyncTextToVideoInput = { /** * Text content for lip-sync video generation. Max 120 characters. */ text: string; /** * The URL of the video to generate the lip sync for. Supports .mp4/.mov, ≤100MB, 2-60s, 720p/1080p only, width/height 720–1920px. If validation fails, an error is returned. */ video_url: string | Blob | File; /** * Voice ID to use for speech synthesis */ voice_id: "genshin_vindi2" | "zhinen_xuesheng" | "AOT" | "ai_shatang" | "genshin_klee2" | "genshin_kirara" | "ai_kaiya" | "oversea_male1" | "ai_chenjiahao_712" | "girlfriend_4_speech02" | "chat1_female_new-3" | "chat_0407_5-1" | "cartoon-boy-07" | "uk_boy1" | "cartoon-girl-01" | "PeppaPig_platform" | "ai_huangzhong_712" | "ai_huangyaoshi_712" | "ai_laoguowang_712" | "chengshu_jiejie" | "you_pingjing" | "calm_story1" | "uk_man2" | "laopopo_speech02" | "heainainai_speech02" | "reader_en_m-v1" | "commercial_lady_en_f-v1" | "tiyuxi_xuedi" | "tiexin_nanyou" | "girlfriend_1_speech02" | "girlfriend_2_speech02" | "zhuxi_speech02" | "uk_oldman3" | "dongbeilaotie_speech02" | "chongqingxiaohuo_speech02" | "chuanmeizi_speech02" | "chaoshandashu_speech02" | "ai_taiwan_man2_speech02" | "xianzhanggui_speech02" | "tianjinjiejie_speech02" | "diyinnansang_DB_CN_M_04-v2" | "yizhipiannan-v1" | "guanxiaofang-v2" | "tianmeixuemei-v1" | "daopianyansang-v1" | "mengwa-v1"; /** * The voice language corresponding to the Voice ID Default value: `"en"` */ voice_language?: "zh" | "en"; /** * Speech rate for Text to Video generation Default value: `1` */ voice_speed?: number; }; export type KlingVideoV15ProEffectsInput = { /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * The effect scene to use for the video generation */ effect_scene: "hug" | "kiss" | "heart_gesture" | "squish" | "expansion" | "fuzzyfuzzy" | "bloombloom" | "dizzydizzy" | "jelly_press" | "jelly_slice" | "jelly_squish" | "jelly_jiggle" | "pixelpixel" | "yearbook" | "instant_film" | "anime_figure" | "rocketrocket" | "fly_fly" | "disappear" | "lightning_power" | "bullet_time" | "bullet_time_360" | "media_interview" | "day_to_night" | "let's_ride" | "jumpdrop" | "swish_swish" | "running_man" | "jazz_jazz" | "swing_swing" | "skateskate" | "building_sweater" | "pure_white_wings" | "black_wings" | "golden_wing" | "pink_pink_wings" | "rampage_ape" | "a_list_look" | "countdown_teleport" | "firework_2026" | "instant_christmas" | "birthday_star" | "firework" | "celebration" | "tiger_hug_pro" | "pet_lion_pro" | "guardian_spirit" | "squeeze_scream" | "inner_voice" | "memory_alive" | "guess_what" | "eagle_snatch" | "hug_from_past" | "instant_kid" | "dollar_rain" | "cry_cry" | "building_collapse" | "mushroom" | "jesus_hug" | "shark_alert" | "lie_flat" | "polar_bear_hug" | "brown_bear_hug" | "office_escape_plow" | "watermelon_bomb" | "boss_coming" | "wig_out" | "car_explosion" | "tiger_hug" | "siblings" | "construction_worker" | "snatched" | "felt_felt" | "plushcut" | "drunk_dance" | "drunk_dance_pet" | "daoma_dance" | "bouncy_dance" | "smooth_sailing_dance" | "new_year_greeting" | "lion_dance" | "prosperity" | "great_success" | "golden_horse_fortune" | "red_packet_box" | "lucky_horse_year" | "lucky_red_packet" | "lucky_money_come" | "lion_dance_pet" | "dumpling_making_pet" | "fish_making_pet" | "pet_red_packet" | "lantern_glow" | "expression_challenge" | "overdrive" | "heart_gesture_dance" | "poping" | "martial_arts" | "running" | "nezha" | "motorcycle_dance" | "subject_3_dance" | "ghost_step_dance" | "phantom_jewel" | "zoom_out" | "cheers_2026" | "kiss_pro" | "fight_pro" | "hug_pro" | "heart_gesture_pro" | "dollar_rain_pro" | "pet_bee_pro" | "santa_random_surprise" | "magic_match_tree" | "happy_birthday" | "thumbs_up_pro" | "surprise_bouquet" | "bouquet_drop" | "3d_cartoon_1_pro" | "glamour_photo_shoot" | "box_of_joy" | "first_toast_of_the_year" | "my_santa_pic" | "santa_gift" | "steampunk_christmas" | "snowglobe" | "christmas_photo_shoot" | "ornament_crash" | "santa_express" | "particle_santa_surround" | "coronation_of_frost" | "spark_in_the_snow" | "scarlet_and_snow" | "cozy_toon_wrap" | "bullet_time_lite" | "magic_cloak" | "balloon_parade" | "jumping_ginger_joy" | "c4d_cartoon_pro" | "venomous_spider" | "throne_of_king" | "luminous_elf" | "woodland_elf" | "japanese_anime_1" | "american_comics" | "snowboarding" | "witch_transform" | "vampire_transform" | "pumpkin_head_transform" | "demon_transform" | "mummy_transform" | "zombie_transform" | "cute_pumpkin_transform" | "cute_ghost_transform" | "knock_knock_halloween" | "halloween_escape" | "baseball" | "trampoline" | "trampoline_night" | "pucker_up" | "feed_mooncake" | "flyer" | "dishwasher" | "pet_chinese_opera" | "magic_fireball" | "gallery_ring" | "pet_moto_rider" | "muscle_pet" | "pet_delivery" | "mythic_style" | "steampunk" | "3d_cartoon_2" | "pet_chef" | "santa_gifts" | "santa_hug" | "girlfriend" | "boyfriend" | "heart_gesture_1" | "pet_wizard" | "smoke_smoke" | "gun_shot" | "double_gun" | "pet_warrior" | "long_hair" | "pet_dance" | "wool_curly" | "pet_bee" | "marry_me" | "piggy_morph" | "ski_ski" | "magic_broom" | "splashsplash" | "surfsurf" | "fairy_wing" | "angel_wing" | "dark_wing" | "emoji"; /** * URL of images to be used for hug, kiss or heart_gesture video. */ input_image_urls?: Array; }; export type KlingVideoV15ProImageToVideoInput = { /** * The aspect ratio of the generated video frame Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * List of dynamic masks */ dynamic_masks?: Array; /** * */ image_url: string | Blob | File; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; /** * URL of the image for Static Brush Application Area (Mask image created by users using the motion brush) */ static_mask_url?: string | Blob | File; /** * URL of the image to be used for the end of the video */ tail_image_url?: string | Blob | File; }; export type KlingVideoV15ProTextToVideoInput = { /** * The aspect ratio of the generated video frame Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; }; export type KlingVideoV16ProElementsInput = { /** * The aspect ratio of the generated video frame Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * List of image URLs to use for video generation. Supports up to 4 images. */ input_image_urls: Array; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; }; export type KlingVideoV16ProImageToVideoInput = { /** * The aspect ratio of the generated video frame Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * */ image_url: string | Blob | File; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; /** * URL of the image to be used for the end of the video */ tail_image_url?: string | Blob | File; }; export type KlingVideoV1StandardImageToVideoInput = { /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * List of dynamic masks */ dynamic_masks?: Array; /** * URL of the image to be used for the video */ image_url: string | Blob | File; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * The prompt for the video */ prompt: string; /** * URL of the image for Static Brush Application Area (Mask image created by users using the motion brush) */ static_mask_url?: string | Blob | File; /** * URL of the image to be used for the end of the video */ tail_image_url?: string | Blob | File; }; export type KlingVideoV1StandardTextToVideoInput = { /** * Advanced Camera control parameters */ advanced_camera_control?: CameraControl; /** * The aspect ratio of the generated video frame Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Camera control parameters */ camera_control?: "down_back" | "forward_up" | "right_turn_forward" | "left_turn_forward"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; }; export type KlingVideoV1TtsInput = { /** * The text to be converted to speech */ text: string; /** * The voice ID to use for speech synthesis Default value: `"genshin_vindi2"` */ voice_id?: "genshin_vindi2" | "zhinen_xuesheng" | "AOT" | "ai_shatang" | "genshin_klee2" | "genshin_kirara" | "ai_kaiya" | "oversea_male1" | "ai_chenjiahao_712" | "girlfriend_4_speech02" | "chat1_female_new-3" | "chat_0407_5-1" | "cartoon-boy-07" | "uk_boy1" | "cartoon-girl-01" | "PeppaPig_platform" | "ai_huangzhong_712" | "ai_huangyaoshi_712" | "ai_laoguowang_712" | "chengshu_jiejie" | "you_pingjing" | "calm_story1" | "uk_man2" | "laopopo_speech02" | "heainainai_speech02" | "reader_en_m-v1" | "commercial_lady_en_f-v1" | "tiyuxi_xuedi" | "tiexin_nanyou" | "girlfriend_1_speech02" | "girlfriend_2_speech02" | "zhuxi_speech02" | "uk_oldman3" | "dongbeilaotie_speech02" | "chongqingxiaohuo_speech02" | "chuanmeizi_speech02" | "chaoshandashu_speech02" | "ai_taiwan_man2_speech02" | "xianzhanggui_speech02" | "tianjinjiejie_speech02" | "diyinnansang_DB_CN_M_04-v2" | "yizhipiannan-v1" | "guanxiaofang-v2" | "tianmeixuemei-v1" | "daopianyansang-v1" | "mengwa-v1"; /** * Rate of speech Default value: `1` */ voice_speed?: number; }; export type KlingVideoV25TurboProImageToVideoInput = { /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * URL of the image to be used for the video */ image_url: string | Blob | File; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; /** * URL of the image to be used for the end of the video */ tail_image_url?: string | Blob | File; }; export type KlingVideoV26ProImageToVideoInput = { /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * URL of the image to be used for the end of the video */ end_image_url?: string | Blob | File; /** * Whether to generate native audio for the video. Supports Chinese and English voice output. Other languages are automatically translated to English. For English speech, use lowercase letters; for acronyms or proper nouns, use uppercase. Default value: `true` */ generate_audio?: boolean; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; /** * URL of the image to be used for the video */ start_image_url: string | Blob | File; /** * Optional Voice IDs for video generation. Reference voices in your prompt with <<>> and <<>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice */ voice_ids?: Array; }; export type KlingVideoV26ProMotionControlInput = { /** * Controls whether the output character's orientation matches the reference image or video. 'video': orientation matches reference video - better for complex motions (max 30s). 'image': orientation matches reference image - better for following camera movements (max 10s). */ character_orientation: "image" | "video"; /** * Reference image URL. The characters, backgrounds, and other elements in the generated video are based on this reference image. Characters should have clear body proportions, avoid occlusion, and occupy more than 5% of the image area. */ image_url: string | Blob | File; /** * Whether to keep the original sound from the reference video. Default value: `true` */ keep_original_sound?: boolean; /** * */ prompt?: string; /** * Reference video URL. The character actions in the generated video will be consistent with this reference video. Should contain a realistic style character with entire body or upper body visible, including head, without obstruction. Duration limit depends on character_orientation: 10s max for 'image', 30s max for 'video'. */ video_url: string | Blob | File; }; export type KlingVideoV26ProTextToVideoInput = { /** * The aspect ratio of the generated video frame Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * Whether to generate native audio for the video. Supports Chinese and English voice output. Other languages are automatically translated to English. For English speech, use lowercase letters; for acronyms or proper nouns, use uppercase. Default value: `true` */ generate_audio?: boolean; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; }; export type KlingVideoV2MasterImageToVideoInput = { /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * URL of the image to be used for the video */ image_url: string | Blob | File; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * */ prompt: string; }; export type KlingVideoV3ProImageToVideoInput = { /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * Elements (characters/objects) to include in the video. Each example can either be an image set (frontal + reference images) or a video. Reference in prompt as @Element1, @Element2, etc. */ elements?: Array; /** * URL of the image to be used for the end of the video */ end_image_url?: string | Blob | File; /** * Whether to generate native audio for the video. Supports Chinese and English voice output. Other languages are automatically translated to English. For English speech, use lowercase letters; for acronyms or proper nouns, use uppercase. Default value: `true` */ generate_audio?: boolean; /** * List of prompts for multi-shot video generation. If provided, divides the video into multiple shots. */ multi_prompt?: Array; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * Text prompt for video generation. Either prompt or multi_prompt must be provided, but not both. */ prompt?: string; /** * The type of multi-shot video generation. Required when multi_prompt is provided. Default value: `"customize"` */ shot_type?: string; /** * URL of the image to be used for the video */ start_image_url: string | Blob | File; }; export type KlingVideoV3ProMotionControlInput = { /** * Controls whether the output character's orientation matches the reference image or video. 'video': orientation matches reference video - better for complex motions (max 30s). 'image': orientation matches reference image - better for following camera movements (max 10s). */ character_orientation: "image" | "video"; /** * Optional element for facial consistency binding. Upload a facial element to enhance identity preservation in the generated video. Only 1 element is supported. Reference in prompt as @Element1. Element binding is only supported when character_orientation is 'video'. */ elements?: Array; /** * Reference image URL. The characters, backgrounds, and other elements in the generated video are based on this reference image. Characters should have clear body proportions, avoid occlusion, and occupy more than 5% of the image area. */ image_url: string | Blob | File; /** * Whether to keep the original sound from the reference video. Default value: `true` */ keep_original_sound?: boolean; /** * */ prompt?: string; /** * Reference video URL. The character actions in the generated video will be consistent with this reference video. Should contain a realistic style character with entire body or upper body visible, including head, without obstruction. Duration limit depends on character_orientation: 10s max for 'image', 30s max for 'video'. */ video_url: string | Blob | File; }; export type KlingVideoV3ProTextToVideoInput = { /** * The aspect ratio of the generated video frame Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt. Default value: `0.5` */ cfg_scale?: number; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * Whether to generate native audio for the video. Supports Chinese and English voice output. Other languages are automatically translated to English. For English speech, use lowercase letters; for acronyms or proper nouns, use uppercase. Default value: `true` */ generate_audio?: boolean; /** * List of prompts for multi-shot video generation. If provided, overrides the single prompt and divides the video into multiple shots with specified prompts and durations. */ multi_prompt?: Array; /** * Default value: `"blur, distort, and low quality"` */ negative_prompt?: string; /** * Text prompt for video generation. Either prompt or multi_prompt must be provided, but not both. */ prompt?: string; /** * The type of multi-shot video generation Default value: `"customize"` */ shot_type?: "customize" | "intelligent"; }; export type KokoroAmericanEnglishInput = { /** * Default value: `""` */ prompt?: string; /** * Speed of the generated audio. Default is 1.0. Default value: `1` */ speed?: number; /** * Voice ID for the desired voice. Default value: `"af_heart"` */ voice?: "af_heart" | "af_alloy" | "af_aoede" | "af_bella" | "af_jessica" | "af_kore" | "af_nicole" | "af_nova" | "af_river" | "af_sarah" | "af_sky" | "am_adam" | "am_echo" | "am_eric" | "am_fenrir" | "am_liam" | "am_michael" | "am_onyx" | "am_puck" | "am_santa"; }; export type KokoroFrenchInput = { /** * */ prompt: string; /** * Speed of the generated audio. Default is 1.0. Default value: `1` */ speed?: number; /** * Voice ID for the desired voice. */ voice: string; }; export type KokoroHindiInput = { /** * */ prompt: string; /** * Speed of the generated audio. Default is 1.0. Default value: `1` */ speed?: number; /** * Voice ID for the desired voice. */ voice: "hf_alpha" | "hf_beta" | "hm_omega" | "hm_psi"; }; export type KolorsImg2ImgInput = { /** * Enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show * you. Default value: `5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for image to image */ image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small * details (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The scheduler to use for the model. Default value: `"EulerDiscreteScheduler"` */ scheduler?: "EulerDiscreteScheduler" | "EulerAncestralDiscreteScheduler" | "DPMSolverMultistepScheduler" | "DPMSolverMultistepScheduler_SDE_karras" | "UniPCMultistepScheduler" | "DEISMultistepScheduler"; /** * Seed */ seed?: number; /** * The strength to use for image-to-image. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and * uploaded before returning the response. This will increase the latency of * the function but it allows you to get the image directly in the response * without going through the CDN. */ sync_mode?: boolean; }; export type kolorsInput = { /** * Enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show * you. Default value: `5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small * details (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to use for generating the image. Be as descriptive as possible * for best results. */ prompt: string; /** * The scheduler to use for the model. Default value: `"EulerDiscreteScheduler"` */ scheduler?: "EulerDiscreteScheduler" | "EulerAncestralDiscreteScheduler" | "DPMSolverMultistepScheduler" | "DPMSolverMultistepScheduler_SDE_karras" | "UniPCMultistepScheduler" | "DEISMultistepScheduler"; /** * Seed */ seed?: number; /** * If set to true, the function will wait for the image to be generated and * uploaded before returning the response. This will increase the latency of * the function but it allows you to get the image directly in the response * without going through the CDN. */ sync_mode?: boolean; }; export type KreaWan14bTextToVideoInput = { /** * Whether to enable prompt expansion. This will use a large language model to expand the prompt with additional details while maintaining the original meaning. */ enable_prompt_expansion?: boolean; /** * Number of frames to generate. Must be a multiple of 12 plus 6, for example 6, 18, 30, 42, etc. Default value: `78` */ num_frames?: number; /** * Prompt for the video-to-video generation. */ prompt: string; /** * Seed for the video-to-video generation. */ seed?: number; }; export type latentsyncInput = { /** * The URL of the audio to generate the lip sync for. */ audio_url: string | Blob | File; /** * Guidance scale for the model inference Default value: `1` */ guidance_scale?: number; /** * Video loop mode when audio is longer than video. Options: pingpong, loop */ loop_mode?: "pingpong" | "loop"; /** * Random seed for generation. If None, a random seed will be used. */ seed?: number; /** * The URL of the video to generate the lip sync for. */ video_url: string | Blob | File; }; export type LavaSrInput = { /** * The format for the output audio. Default value: `"mp3"` */ audio_format?: "mp3" | "aac" | "m4a" | "ogg" | "opus" | "flac" | "wav"; /** * The URL of the audio file to enhance. */ audio_url: string | Blob | File; /** * The bitrate of the output audio. Default value: `"192k"` */ bitrate?: string; /** * If `True`, applies UL-UNAS noise filtering before bandwidth extension. */ denoise?: boolean; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type LavaSrOutput = { /** * The enhanced audio file. */ audio: AudioFile; /** * Timings for each step in the pipeline. */ timings: LavaSRTimings; }; export type LavaSRTimings = { /** * Time taken to run the inference in seconds. */ inference: number; /** * Time taken to postprocess the audio in seconds. */ postprocess: number; /** * Time taken to preprocess the audio in seconds. */ preprocess: number; }; export type LayerDiffusionInput = { /** * If set to false, the safety checker will be disabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale for the model. Default value: `8` */ guidance_scale?: number; /** * The prompt to use for generating the negative image. Be as descriptive as possible for best results. Default value: `"text, watermark"` */ negative_prompt?: string; /** * The number of inference steps for the model. Default value: `20` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` */ prompt?: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; }; export type lcmInput = { /** * If set to true, the inpainting pipeline will use controlnet inpainting. * Only effective for inpainting pipelines. */ controlnet_inpaint?: boolean; /** * If set to true, the resulting image will be checked whether it includes any * potentially unsafe content. If it does, it will be replaced with a black * image. Default value: `true` */ enable_safety_checks?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `1` */ guidance_scale?: number; /** * The size of the generated image. You can choose between some presets or * custom height and width that **must be multiples of 8**. * * If not provided: * - For text-to-image generations, the default size is 512x512. * - For image-to-image generations, the default size is the same as the input image. * - For inpainting generations, the default size is the same as the input image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The base image to use for guiding the image generation on image-to-image * generations. If the either width or height of the image is larger than 1024 * pixels, the image will be resized to 1024 pixels while keeping the aspect ratio. */ image_url?: string | Blob | File; /** * If set to true, the inpainting pipeline will only inpaint the provided mask * area. Only effective for inpainting pipelines. */ inpaint_mask_only?: boolean; /** * The scale of the lora server to use for image generation. Default value: `1` */ lora_scale?: number; /** * The url of the lora server to use for image generation. */ lora_url?: string | Blob | File; /** * The mask to use for guiding the image generation on image * inpainting. The model will focus on the mask area and try to fill it with * the most relevant content. * * The mask must be a black and white image where the white area is the area * that needs to be filled and the black area is the area that should be * ignored. * * The mask must have the same dimensions as the image passed as `image_url`. */ mask_url?: string | Blob | File; /** * The model to use for generating the image. Default value: `"sdv1-5"` */ model?: "sdxl" | "sdv1-5"; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. The function will return a list of images * with the same prompt and negative prompt but different seeds. Default value: `1` */ num_images?: number; /** * The number of inference steps to use for generating the image. The more steps * the better the image will be but it will also take longer to generate. Default value: `4` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The strength of the image that is passed as `image_url`. The strength * determines how much the generated image will be similar to the image passed as * `image_url`. The higher the strength the more model gets "creative" and * generates an image that's different from the initial image. A strength of 1.0 * means that the initial image is more or less ignored and the model will try to * generate an image that's as close as possible to the prompt. Default value: `0.8` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type lcmOutput = { /** * The generated image files info. */ images: Array; /** * A list of booleans indicating whether the generated image contains any * potentially unsafe content. If the safety check is disabled, this field * will all will be false. */ nsfw_content_detected: Array; /** * Number of inference steps used to generate the image. It will be the same value of the one passed in the * input or the default one in case none was passed. Default value: `4` */ num_inference_steps?: number; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; }; export type LcmSd15I2iInput = { /** * If set to true, the resulting image will be checked whether it includes any * potentially unsafe content. If it does, it will be replaced with a black * image. Default value: `true` */ enable_safety_checks?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `1` */ guidance_scale?: number; /** * The image to use as a base. */ image_url: string | Blob | File; /** * Mask URL for compatibility with generic LCM processing. */ mask_url?: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. The function will return a list of images * with the same prompt and negative prompt but different seeds. Default value: `1` */ num_images?: number; /** * The number of inference steps to use for generating the image. The more steps * the better the image will be but it will also take longer to generate. Default value: `4` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The strength of the image. Default value: `0.8` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Lighting = { /** * The conditions of the lighting in the image to be generated. */ conditions?: string; /** * The direction of the lighting in the image to be generated. */ direction?: string; /** * The shadows in the image to be generated. */ shadows?: string; }; export type LightningModelsImageToImageInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The Lightning model to use. */ model_name?: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `5` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * Scheduler / sampler to use for the image denoising process. */ scheduler?: "DPM++ 2M" | "DPM++ 2M Karras" | "DPM++ 2M SDE" | "DPM++ 2M SDE Karras" | "DPM++ SDE" | "DPM++ SDE Karras" | "KDPM 2A" | "Euler" | "Euler (trailing timesteps)" | "Euler A" | "LCM" | "EDMDPMSolverMultistepScheduler" | "TCDScheduler"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type LightningModelsInpaintingInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The Lightning model to use. */ model_name?: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `5` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * Scheduler / sampler to use for the image denoising process. */ scheduler?: "DPM++ 2M" | "DPM++ 2M Karras" | "DPM++ 2M SDE" | "DPM++ 2M SDE Karras" | "DPM++ SDE" | "DPM++ SDE Karras" | "KDPM 2A" | "Euler" | "Euler (trailing timesteps)" | "Euler A" | "LCM" | "EDMDPMSolverMultistepScheduler" | "TCDScheduler"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type LightningModelsInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The list of LoRA weights to use. */ loras?: Array; /** * The Lightning model to use. */ model_name?: string; /** * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `5` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * Scheduler / sampler to use for the image denoising process. */ scheduler?: "DPM++ 2M" | "DPM++ 2M Karras" | "DPM++ 2M SDE" | "DPM++ 2M SDE Karras" | "DPM++ SDE" | "DPM++ SDE Karras" | "KDPM 2A" | "Euler" | "Euler (trailing timesteps)" | "Euler A" | "LCM" | "EDMDPMSolverMultistepScheduler" | "TCDScheduler"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type LightxRecameraInput = { /** * Camera control mode. Default value: `"traj"` */ camera?: "traj" | "target"; /** * Camera motion mode. Default value: `"gradual"` */ mode?: "gradual" | "bullet" | "direct" | "dolly-zoom"; /** * Optional text prompt. If omitted, Light-X will auto-caption the video. */ prompt?: string; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; /** * Target camera pose [theta, phi, radius, x, y] (required when camera='target'). */ target_pose?: Array; /** * Camera trajectory parameters (required for recamera mode). */ trajectory?: TrajectoryParameters; /** * URL of the input video. */ video_url: string | Blob | File; }; export type LightxRelightInput = { /** * Optional text prompt. If omitted, Light-X will auto-caption the video. */ prompt?: string; /** * Frame index to use as referencen to relight the video with reference. */ ref_id?: number; /** * Relighting parameters (required for relight_condition_type='ic'). Not used for 'bg' (which expects a background image URL instead). */ relight_parameters?: RelightParameters; /** * URL of conditioning image. Required for relight_condition_type='ref'/'hdr'. Also required for relight_condition_type='bg' (background image). */ relit_cond_img_url?: string | Blob | File; /** * Relight condition type. Default value: `"ic"` */ relit_cond_type?: "ic" | "ref" | "hdr" | "bg"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; /** * URL of the input video. */ video_url: string | Blob | File; }; export type LightxRelightOutput = { /** * Optional: normalized/processed input video (if produced by the pipeline). */ input_video?: File; /** * The seed used for generation. */ seed: number; /** * The generated video file. */ video: File; /** * Optional: visualization/debug video (if produced by the pipeline). */ viz_video?: File; }; export type LineartInput = { /** * Whether to use the coarse model */ coarse?: boolean; /** * URL of the image to process */ image_url: string | Blob | File; }; export type lipsyncInput = { /** * */ audio_url: string | Blob | File; /** * */ video_url: string | Blob | File; }; export type LipSyncInput = { /** * URL of the input audio */ audio_url: string | Blob | File; /** * The model to use for lipsyncing Default value: `"lipsync-1.9.0-beta"` */ model?: "lipsync-1.8.0" | "lipsync-1.7.1" | "lipsync-1.9.0-beta"; /** * Lipsync mode when audio and video durations are out of sync. Default value: `"cut_off"` */ sync_mode?: "cut_off" | "loop" | "bounce" | "silence" | "remap"; /** * URL of the input video */ video_url: string | Blob | File; }; export type ListAvatarsOutput = { /** * List of available avatar names */ avatars: Array; }; export type ListVoicesInput = Record; export type ListVoicesOutput = { /** * List of available voice names */ voices: Array; }; export type LiveAvatarInput = { /** * Acceleration level for faster video decoding Default value: `"none"` */ acceleration?: "none" | "light" | "regular" | "high"; /** * The URL of the driving audio file (WAV or MP3). The avatar will be animated to match this audio. */ audio_url: string | Blob | File; /** * Enable safety checker for content moderation. Default value: `true` */ enable_safety_checker?: boolean; /** * Number of frames per clip. Must be a multiple of 4. Higher values = smoother but slower generation. Default value: `48` */ frames_per_clip?: number; /** * Classifier-free guidance scale. Higher values follow the prompt more closely. */ guidance_scale?: number; /** * The URL of the reference image for avatar generation. The character in this image will be animated. */ image_url: string | Blob | File; /** * Number of video clips to generate. Each clip is approximately 3 seconds. Set higher for longer videos. Default value: `10` */ num_clips?: number; /** * A text prompt describing the scene and character. Helps guide the video generation style and context. */ prompt: string; /** * Random seed for reproducible generation. */ seed?: number; }; export type LivePortraitImageInput = { /** * Amount to open mouth in 'aaa' shape */ aaa?: number; /** * Amount to blink the eyes */ blink?: number; /** * Size of the output image. Default value: `512` */ dsize?: number; /** * Amount to shape mouth in 'eee' position */ eee?: number; /** * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. * The safety checker will process the input image */ enable_safety_checker?: boolean; /** * Amount to raise or lower eyebrows */ eyebrow?: number; /** * Whether to crop the source portrait to the face-cropping space. Default value: `true` */ flag_do_crop?: boolean; /** * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` */ flag_do_rot?: boolean; /** * Whether to set the lip to closed state before animation. Only takes effect when flag_eye_retargeting and flag_lip_retargeting are False. Default value: `true` */ flag_lip_zero?: boolean; /** * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` */ flag_pasteback?: boolean; /** * URL of the image to be animated */ image_url: string | Blob | File; /** * Output format Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * Amount to move pupils horizontally */ pupil_x?: number; /** * Amount to move pupils vertically */ pupil_y?: number; /** * Amount to rotate the face in pitch */ rotate_pitch?: number; /** * Amount to rotate the face in roll */ rotate_roll?: number; /** * Amount to rotate the face in yaw */ rotate_yaw?: number; /** * Scaling factor for the face crop. Default value: `2.3` */ scale?: number; /** * Amount to smile */ smile?: number; /** * Horizontal offset ratio for face crop. */ vx_ratio?: number; /** * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` */ vy_ratio?: number; /** * Amount to wink */ wink?: number; /** * Amount to shape mouth in 'woo' position */ woo?: number; }; export type LivePortraitInput = { /** * Amount to open mouth in 'aaa' shape */ aaa?: number; /** * Batch size for the model. The larger the batch size, the faster the model will run, but the more memory it will consume. Default value: `32` */ batch_size?: number; /** * Amount to blink the eyes */ blink?: number; /** * Size of the output image. Default value: `512` */ dsize?: number; /** * Amount to shape mouth in 'eee' position */ eee?: number; /** * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. * The safety checker will process the input image */ enable_safety_checker?: boolean; /** * Amount to raise or lower eyebrows */ eyebrow?: number; /** * Whether to crop the source portrait to the face-cropping space. Default value: `true` */ flag_do_crop?: boolean; /** * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` */ flag_do_rot?: boolean; /** * Whether to enable eye retargeting. */ flag_eye_retargeting?: boolean; /** * Whether to enable lip retargeting. */ flag_lip_retargeting?: boolean; /** * Whether to set the lip to closed state before animation. Only takes effect when flag_eye_retargeting and flag_lip_retargeting are False. Default value: `true` */ flag_lip_zero?: boolean; /** * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` */ flag_pasteback?: boolean; /** * Whether to use relative motion. Default value: `true` */ flag_relative?: boolean; /** * Whether to enable stitching. Recommended to set to True. Default value: `true` */ flag_stitching?: boolean; /** * URL of the image to be animated */ image_url: string | Blob | File; /** * Amount to move pupils horizontally */ pupil_x?: number; /** * Amount to move pupils vertically */ pupil_y?: number; /** * Amount to rotate the face in pitch */ rotate_pitch?: number; /** * Amount to rotate the face in roll */ rotate_roll?: number; /** * Amount to rotate the face in yaw */ rotate_yaw?: number; /** * Scaling factor for the face crop. Default value: `2.3` */ scale?: number; /** * Amount to smile */ smile?: number; /** * URL of the video to drive the lip syncing. */ video_url: string | Blob | File; /** * Horizontal offset ratio for face crop. */ vx_ratio?: number; /** * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` */ vy_ratio?: number; /** * Amount to wink */ wink?: number; /** * Amount to shape mouth in 'woo' position */ woo?: number; }; export type LivePortraitVideoInput = { /** * Whether to prioritize source or driving audio. Default value: `"source"` */ audio_priority?: "source" | "driving"; /** * URL of the video to drive the lip syncing. */ driving_video_url: string | Blob | File; /** * Whether to filter out NSFW content. Default value: `true` */ enable_safety_checker?: boolean; /** * URL of the video to drive the lip syncing. */ source_video_url: string | Blob | File; }; export type LlavaNextInput = { /** * URL of the image to be processed */ image_url: string | Blob | File; /** * Maximum number of tokens to generate Default value: `64` */ max_tokens?: number; /** * Prompt to be used for the image */ prompt: string; /** * Temperature for sampling Default value: `0.2` */ temperature?: number; /** * Top P for sampling Default value: `1` */ top_p?: number; }; export type LongcatImageEditInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the image generation. Default value: `4.5` */ guidance_scale?: number; /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to edit the image with. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type LongcatMultiAvatarImageAudioToVideoInput = { /** * The audio guidance scale. Higher values may lead to exaggerated mouth movements. Default value: `4` */ audio_guidance_scale?: number; /** * How to combine the two audio tracks. 'para' (parallel) plays both simultaneously, 'add' (sequential) plays person 1 first then person 2. Default value: `"para"` */ audio_type?: "para" | "add"; /** * The URL of the audio file for person 1 (left side). Default value: `"https://raw.githubusercontent.com/meituan-longcat/LongCat-Video/refs/heads/main/assets/avatar/multi/sing_man.WAV"` */ audio_url_person1?: string; /** * The URL of the audio file for person 2 (right side). Default value: `"https://raw.githubusercontent.com/meituan-longcat/LongCat-Video/refs/heads/main/assets/avatar/multi/sing_woman.WAV"` */ audio_url_person2?: string; /** * Bounding box for person 1. If not provided, defaults to left half of image. */ bbox_person1?: BoundingBox; /** * Bounding box for person 2. If not provided, defaults to right half of image. */ bbox_person2?: BoundingBox; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The URL of the image containing two speakers. */ image_url: string | Blob | File; /** * The negative prompt to avoid in the video generation. Default value: `"Close-up, Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * The number of inference steps to use. Default value: `30` */ num_inference_steps?: number; /** * Number of video segments to generate. Each segment adds ~5 seconds of video. First segment is ~5.8s, additional segments are 5s each. Default value: `1` */ num_segments?: number; /** * The prompt to guide the video generation. Default value: `"Two people are having a conversation with natural expressions and movements."` */ prompt?: string; /** * Resolution of the generated video (480p or 720p). Billing is per video-second (16 frames): 480p is 1 unit per second and 720p is 4 units per second. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The seed for the random number generator. */ seed?: number; /** * The text guidance scale for classifier-free guidance. Default value: `4` */ text_guidance_scale?: number; }; export type LongcatSingleAvatarAudioToVideoInput = { /** * The audio guidance scale. Higher values may lead to exaggerated mouth movements. Default value: `4` */ audio_guidance_scale?: number; /** * The URL of the audio file to drive the avatar. */ audio_url: string | Blob | File; /** * Whether to enable prompt expansion using an LLM to enhance the prompt for better video quality. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The negative prompt to avoid in the video generation. Default value: `"Close-up, Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * The number of inference steps to use. Default value: `50` */ num_inference_steps?: number; /** * Number of video segments to generate. Each segment adds ~5 seconds of video. First segment is ~5.8s, additional segments are 5s each. Default value: `1` */ num_segments?: number; /** * The prompt to guide the video generation. Default value: `"A person is talking naturally with natural expressions and movements."` */ prompt?: string; /** * Resolution of the generated video (480p or 720p). Billing is per video-second (16 frames): 480p is 1 unit per second and 720p is 4 units per second. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The seed for the random number generator. */ seed?: number; /** * The text guidance scale for classifier-free guidance. Default value: `4` */ text_guidance_scale?: number; }; export type LongcatSingleAvatarImageAudioToVideoInput = { /** * The audio guidance scale. Higher values may lead to exaggerated mouth movements. Default value: `4` */ audio_guidance_scale?: number; /** * The URL of the audio file to drive the avatar. */ audio_url: string | Blob | File; /** * Whether to enable prompt expansion using an LLM to enhance the prompt for better video quality. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The URL of the image to animate. */ image_url: string | Blob | File; /** * The negative prompt to avoid in the video generation. Default value: `"Close-up, Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * The number of inference steps to use. Default value: `50` */ num_inference_steps?: number; /** * Number of video segments to generate. Each segment adds ~5 seconds of video. First segment is ~5.8s, additional segments are 5s each. Default value: `1` */ num_segments?: number; /** * The prompt to guide the video generation. Default value: `"A person is talking naturally with natural expressions and movements."` */ prompt?: string; /** * Resolution of the generated video (480p or 720p). Billing is per video-second (16 frames): 480p is 1 unit per second and 720p is 4 units per second. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * The seed for the random number generator. */ seed?: number; /** * The text guidance scale for classifier-free guidance. Default value: `4` */ text_guidance_scale?: number; }; export type LongcatVideoDistilledImageToVideo480pInput = { /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `15` */ fps?: number; /** * The URL of the image to generate a video from. */ image_url: string | Blob | File; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use. Default value: `12` */ num_inference_steps?: number; /** * The prompt to guide the video generation. Default value: `"First-person view from the cockpit of a Formula 1 car. The driver's gloved hands firmly grip the intricate, carbon-fiber steering wheel adorned with numerous colorful buttons and a vibrant digital display showing race data. Beyond the windshield, a sun-drenched racetrack stretches ahead, lined with cheering spectators in the grandstands. Several rival cars are visible in the distance, creating a dynamic sense of competition. The sky above is a clear, brilliant blue, reflecting the exhilarating atmosphere of a high-speed race. high resolution 4k"` */ prompt?: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LongcatVideoDistilledImageToVideo720pInput = { /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `30` */ fps?: number; /** * The URL of the image to generate a video from. */ image_url: string | Blob | File; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use. Default value: `12` */ num_inference_steps?: number; /** * The number of inference steps to use for refinement. Default value: `12` */ num_refine_inference_steps?: number; /** * The prompt to guide the video generation. Default value: `"First-person view from the cockpit of a Formula 1 car. The driver's gloved hands firmly grip the intricate, carbon-fiber steering wheel adorned with numerous colorful buttons and a vibrant digital display showing race data. Beyond the windshield, a sun-drenched racetrack stretches ahead, lined with cheering spectators in the grandstands. Several rival cars are visible in the distance, creating a dynamic sense of competition. The sky above is a clear, brilliant blue, reflecting the exhilarating atmosphere of a high-speed race. high resolution 4k"` */ prompt?: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LongcatVideoDistilledTextToVideo480pInput = { /** * The aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `15` */ fps?: number; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use. Default value: `12` */ num_inference_steps?: number; /** * The prompt to guide the video generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LongcatVideoDistilledTextToVideo720pInput = { /** * The aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `30` */ fps?: number; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use. Default value: `12` */ num_inference_steps?: number; /** * The number of inference steps to use for refinement. Default value: `12` */ num_refine_inference_steps?: number; /** * The prompt to guide the video generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LongcatVideoImageToVideo480pInput = { /** * The acceleration level to use for the video generation. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `15` */ fps?: number; /** * The guidance scale to use for the video generation. Default value: `4` */ guidance_scale?: number; /** * The URL of the image to generate a video from. */ image_url: string | Blob | File; /** * The negative prompt to use for the video generation. Default value: `"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use for the video generation. Default value: `40` */ num_inference_steps?: number; /** * The prompt to guide the video generation. Default value: `"First-person view from the cockpit of a Formula 1 car. The driver's gloved hands firmly grip the intricate, carbon-fiber steering wheel adorned with numerous colorful buttons and a vibrant digital display showing race data. Beyond the windshield, a sun-drenched racetrack stretches ahead, lined with cheering spectators in the grandstands. Several rival cars are visible in the distance, creating a dynamic sense of competition. The sky above is a clear, brilliant blue, reflecting the exhilarating atmosphere of a high-speed race. high resolution 4k"` */ prompt?: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LongcatVideoImageToVideo720pInput = { /** * The acceleration level to use for the video generation. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `30` */ fps?: number; /** * The guidance scale to use for the video generation. Default value: `4` */ guidance_scale?: number; /** * The URL of the image to generate a video from. */ image_url: string | Blob | File; /** * The negative prompt to use for the video generation. Default value: `"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use for the video generation. Default value: `40` */ num_inference_steps?: number; /** * The number of inference steps to use for refinement. Default value: `40` */ num_refine_inference_steps?: number; /** * The prompt to guide the video generation. Default value: `"First-person view from the cockpit of a Formula 1 car. The driver's gloved hands firmly grip the intricate, carbon-fiber steering wheel adorned with numerous colorful buttons and a vibrant digital display showing race data. Beyond the windshield, a sun-drenched racetrack stretches ahead, lined with cheering spectators in the grandstands. Several rival cars are visible in the distance, creating a dynamic sense of competition. The sky above is a clear, brilliant blue, reflecting the exhilarating atmosphere of a high-speed race. high resolution 4k"` */ prompt?: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LongcatVideoTextToVideo480pInput = { /** * The acceleration level to use for the video generation. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * The aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `15` */ fps?: number; /** * The guidance scale to use for the video generation. Default value: `4` */ guidance_scale?: number; /** * The negative prompt to use for the video generation. Default value: `"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use for the video generation. Default value: `40` */ num_inference_steps?: number; /** * The prompt to guide the video generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LongcatVideoTextToVideo720pInput = { /** * The acceleration level to use for the video generation. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * The aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Whether to enable prompt expansion. */ enable_prompt_expansion?: boolean; /** * Whether to enable safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the generated video. Default value: `30` */ fps?: number; /** * The guidance scale to use for the video generation. Default value: `4` */ guidance_scale?: number; /** * The negative prompt to use for the video generation. Default value: `"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `162` */ num_frames?: number; /** * The number of inference steps to use for the video generation. Default value: `40` */ num_inference_steps?: number; /** * The number of inference steps to use for refinement. Default value: `40` */ num_refine_inference_steps?: number; /** * The prompt to guide the video generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LoraImageToImageInput = { /** * Skips part of the image generation process, leading to slightly different results. * This means the image renders faster, too. */ clip_skip?: number; /** * If set to true, the controlnet will be applied to only the conditional predictions. */ controlnet_guess_mode?: boolean; /** * The control nets to use for the image generation. You can use any number of control nets * and they will be applied to the image at the specified timesteps. */ controlnets?: Array; /** * If set to true, the latents will be saved for debugging. */ debug_latents?: boolean; /** * If set to true, the latents will be saved for debugging per pass. */ debug_per_pass_latents?: boolean; /** * The embeddings to use for the image generation. Only a single embedding is supported at the moment. * The embeddings will be used to map the tokens in the prompt to the embedding weights. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * The eta value to be used for the image generation. */ eta?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The URL of the IC Light model image to use for the image generation. */ ic_light_image_url?: string | Blob | File; /** * The URL of the IC Light model background image to use for the image generation. * Make sure to use a background compatible with the model. */ ic_light_model_background_image_url?: string | Blob | File; /** * The URL of the IC Light model to use for the image generation. */ ic_light_model_url?: string | Blob | File; /** * The path to the image encoder model to use for the image generation. */ image_encoder_path?: string; /** * The subfolder of the image encoder model to use for the image generation. */ image_encoder_subfolder?: string; /** * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` */ image_encoder_weight_name?: string; /** * The format of the generated image. Default value: `"png"` */ image_format?: "jpeg" | "png"; /** * URL of image to use for image to image/inpainting. */ image_url?: string | Blob | File; /** * The IP adapter to use for the image generation. */ ip_adapter?: Array; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * URL or HuggingFace ID of the base model to generate the image. */ model_name: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The amount of noise to add to noise image for image. Only used if the image_url is provided. 1.0 is complete noise and 0 is no noise. Default value: `0.5` */ noise_strength?: number; /** * Number of images to generate in one request. Note that the higher the batch size, * the longer it will take to generate the images. Default value: `1` */ num_images?: number; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ num_inference_steps?: number; /** * The type of prediction to use for the image generation. * The `epsilon` is the default. Default value: `"epsilon"` */ prediction_type?: "v_prediction" | "epsilon"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * If set to true, the prompt weighting syntax will be used. * Additionally, this will lift the 77 token limit by averaging embeddings. */ prompt_weighting?: boolean; /** * Whether to set the rescale_betas_snr_zero option or not for the sampler */ rescale_betas_snr_zero?: boolean; /** * Scheduler / sampler to use for the image denoising process. */ scheduler?: "DPM++ 2M" | "DPM++ 2M Karras" | "DPM++ 2M SDE" | "DPM++ 2M SDE Karras" | "Euler" | "Euler A" | "Euler (trailing timesteps)" | "LCM" | "LCM (trailing timesteps)" | "DDIM" | "TCD"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. */ sigmas?: SigmasInput; /** * The size of the tiles to be used for the image generation. Default value: `4096` */ tile_height?: number; /** * The stride of the tiles to be used for the image generation. Default value: `2048` */ tile_stride_height?: number; /** * The stride of the tiles to be used for the image generation. Default value: `2048` */ tile_stride_width?: number; /** * The size of the tiles to be used for the image generation. Default value: `4096` */ tile_width?: number; /** * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. */ timesteps?: TimestepsInput; /** * URL or HuggingFace ID of the custom U-Net model to use for the image generation. */ unet_name?: string; /** * The variant of the model to use for huggingface models, e.g. 'fp16'. */ variant?: string; }; export type LoraInpaintInput = { /** * Skips part of the image generation process, leading to slightly different results. * This means the image renders faster, too. */ clip_skip?: number; /** * If set to true, the controlnet will be applied to only the conditional predictions. */ controlnet_guess_mode?: boolean; /** * The control nets to use for the image generation. You can use any number of control nets * and they will be applied to the image at the specified timesteps. */ controlnets?: Array; /** * If set to true, the latents will be saved for debugging. */ debug_latents?: boolean; /** * If set to true, the latents will be saved for debugging per pass. */ debug_per_pass_latents?: boolean; /** * The embeddings to use for the image generation. Only a single embedding is supported at the moment. * The embeddings will be used to map the tokens in the prompt to the embedding weights. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * The eta value to be used for the image generation. */ eta?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The URL of the IC Light model image to use for the image generation. */ ic_light_image_url?: string | Blob | File; /** * The URL of the IC Light model background image to use for the image generation. * Make sure to use a background compatible with the model. */ ic_light_model_background_image_url?: string | Blob | File; /** * The URL of the IC Light model to use for the image generation. */ ic_light_model_url?: string | Blob | File; /** * The path to the image encoder model to use for the image generation. */ image_encoder_path?: string; /** * The subfolder of the image encoder model to use for the image generation. */ image_encoder_subfolder?: string; /** * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` */ image_encoder_weight_name?: string; /** * The format of the generated image. Default value: `"png"` */ image_format?: "jpeg" | "png"; /** * URL of image to use for image to image/inpainting. */ image_url?: string | Blob | File; /** * The IP adapter to use for the image generation. */ ip_adapter?: Array; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * URL of black-and-white image to use as mask during inpainting. */ mask_url?: string | Blob | File; /** * URL or HuggingFace ID of the base model to generate the image. */ model_name: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The amount of noise to add to noise image for image. Only used if the image_url is provided. 1.0 is complete noise and 0 is no noise. Default value: `0.5` */ noise_strength?: number; /** * Number of images to generate in one request. Note that the higher the batch size, * the longer it will take to generate the images. Default value: `1` */ num_images?: number; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ num_inference_steps?: number; /** * The type of prediction to use for the image generation. * The `epsilon` is the default. Default value: `"epsilon"` */ prediction_type?: "v_prediction" | "epsilon"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * If set to true, the prompt weighting syntax will be used. * Additionally, this will lift the 77 token limit by averaging embeddings. */ prompt_weighting?: boolean; /** * Whether to set the rescale_betas_snr_zero option or not for the sampler */ rescale_betas_snr_zero?: boolean; /** * Scheduler / sampler to use for the image denoising process. */ scheduler?: "DPM++ 2M" | "DPM++ 2M Karras" | "DPM++ 2M SDE" | "DPM++ 2M SDE Karras" | "Euler" | "Euler A" | "Euler (trailing timesteps)" | "LCM" | "LCM (trailing timesteps)" | "DDIM" | "TCD"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. */ sigmas?: SigmasInput; /** * The size of the tiles to be used for the image generation. Default value: `4096` */ tile_height?: number; /** * The stride of the tiles to be used for the image generation. Default value: `2048` */ tile_stride_height?: number; /** * The stride of the tiles to be used for the image generation. Default value: `2048` */ tile_stride_width?: number; /** * The size of the tiles to be used for the image generation. Default value: `4096` */ tile_width?: number; /** * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. */ timesteps?: TimestepsInput; /** * URL or HuggingFace ID of the custom U-Net model to use for the image generation. */ unet_name?: string; /** * The variant of the model to use for huggingface models, e.g. 'fp16'. */ variant?: string; }; export type loraInput = { /** * Skips part of the image generation process, leading to slightly different results. * This means the image renders faster, too. */ clip_skip?: number; /** * If set to true, the controlnet will be applied to only the conditional predictions. */ controlnet_guess_mode?: boolean; /** * The control nets to use for the image generation. You can use any number of control nets * and they will be applied to the image at the specified timesteps. */ controlnets?: Array; /** * If set to true, the latents will be saved for debugging. */ debug_latents?: boolean; /** * If set to true, the latents will be saved for debugging per pass. */ debug_per_pass_latents?: boolean; /** * The embeddings to use for the image generation. Only a single embedding is supported at the moment. * The embeddings will be used to map the tokens in the prompt to the embedding weights. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * The eta value to be used for the image generation. */ eta?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The URL of the IC Light model image to use for the image generation. */ ic_light_image_url?: string | Blob | File; /** * The URL of the IC Light model background image to use for the image generation. * Make sure to use a background compatible with the model. */ ic_light_model_background_image_url?: string | Blob | File; /** * The URL of the IC Light model to use for the image generation. */ ic_light_model_url?: string | Blob | File; /** * The path to the image encoder model to use for the image generation. */ image_encoder_path?: string; /** * The subfolder of the image encoder model to use for the image generation. */ image_encoder_subfolder?: string; /** * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` */ image_encoder_weight_name?: string; /** * The format of the generated image. Default value: `"png"` */ image_format?: "jpeg" | "png"; /** * The size of the generated image. You can choose between some presets or custom height and width * that **must be multiples of 8**. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The IP adapter to use for the image generation. */ ip_adapter?: Array; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * URL or HuggingFace ID of the base model to generate the image. */ model_name: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Number of images to generate in one request. Note that the higher the batch size, * the longer it will take to generate the images. Default value: `1` */ num_images?: number; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ num_inference_steps?: number; /** * The type of prediction to use for the image generation. * The `epsilon` is the default. Default value: `"epsilon"` */ prediction_type?: "v_prediction" | "epsilon"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * If set to true, the prompt weighting syntax will be used. * Additionally, this will lift the 77 token limit by averaging embeddings. */ prompt_weighting?: boolean; /** * Whether to set the rescale_betas_snr_zero option or not for the sampler */ rescale_betas_snr_zero?: boolean; /** * Scheduler / sampler to use for the image denoising process. */ scheduler?: "DPM++ 2M" | "DPM++ 2M Karras" | "DPM++ 2M SDE" | "DPM++ 2M SDE Karras" | "Euler" | "Euler A" | "Euler (trailing timesteps)" | "LCM" | "LCM (trailing timesteps)" | "DDIM" | "TCD"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. */ sigmas?: SigmasInput; /** * The size of the tiles to be used for the image generation. Default value: `4096` */ tile_height?: number; /** * The stride of the tiles to be used for the image generation. Default value: `2048` */ tile_stride_height?: number; /** * The stride of the tiles to be used for the image generation. Default value: `2048` */ tile_stride_width?: number; /** * The size of the tiles to be used for the image generation. Default value: `4096` */ tile_width?: number; /** * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. */ timesteps?: TimestepsInput; /** * URL or HuggingFace ID of the custom U-Net model to use for the image generation. */ unet_name?: string; /** * The variant of the model to use for huggingface models, e.g. 'fp16'. */ variant?: string; }; export type LoraInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the image generation. Default value: `4` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The LoRAs to use for the image generation. You can use up to 3 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The negative prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type LoRAInput = { /** * URL, HuggingFace repo ID (owner/repo) to lora weights. */ path: string; /** * Scale factor for LoRA application (0.0 to 4.0). Default value: `1` */ scale?: number; }; export type loraOutput = { /** * The latents saved for debugging. */ debug_latents: File; /** * The latents saved for debugging per pass. */ debug_per_pass_latents: File; /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image files info. */ images: Array; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; }; export type LoraWeight = { /** * If set to true, the embedding will be forced to be used. */ force?: boolean; /** * URL or the path to the LoRA weights. Or HF model name. */ path: string; /** * The scale of the LoRA weight. This is used to scale the LoRA weight * before merging it with the base model. Default value: `1` */ scale?: number; }; export type LoRAWeight = { /** * URL or the path to the LoRA weights. */ path: string; /** * The scale of the LoRA weight. This is used to scale the LoRA weight * before merging it with the base model. Default value: `1` */ scale?: number; /** * Specifies the transformer to load the lora weight into. 'high' loads into the high-noise transformer, 'low' loads it into the low-noise transformer, while 'both' loads the LoRA into both transformers. Default value: `"high"` */ transformer?: "high" | "low" | "both"; /** * Name of the LoRA weight. Used only if `path` is a Hugging Face repository, and required only if you have more than 1 safetensors file in the repo. */ weight_name?: string; }; export type LoudnessNormalizationSetting = { /** * Enable loudness normalization for the audio Default value: `true` */ enabled?: boolean; /** * Target loudness in LUFS (default -18.0) Default value: `-18` */ target_loudness?: number; /** * Target peak level in dBTP (default -0.5). Default value: `-0.5` */ target_peak?: number; /** * Target loudness range in LU (default 8.0) Default value: `8` */ target_range?: number; }; export type LoudnormInput = { /** * URL of the audio file to normalize */ audio_url: string | Blob | File; /** * Treat mono input files as dual-mono for correct EBU R128 measurement on stereo systems */ dual_mono?: boolean; /** * Integrated loudness target in LUFS. Default value: `-18` */ integrated_loudness?: number; /** * Use linear normalization mode (single-pass). If false, uses dynamic mode (two-pass for better quality). */ linear?: boolean; /** * Loudness range target in LU Default value: `7` */ loudness_range?: number; /** * Measured integrated loudness of input file in LUFS. Required for linear mode. */ measured_i?: number; /** * Measured loudness range of input file in LU. Required for linear mode. */ measured_lra?: number; /** * Measured threshold of input file in LUFS. Required for linear mode. */ measured_thresh?: number; /** * Measured true peak of input file in dBTP. Required for linear mode. */ measured_tp?: number; /** * Offset gain in dB applied before the true-peak limiter */ offset?: number; /** * Return loudness measurement summary with the normalized audio */ print_summary?: boolean; /** * Maximum true peak in dBTP. Default value: `-0.1` */ true_peak?: number; }; export type LoudnormOutput = { /** * Normalized audio file */ audio: File; /** * Structured loudness measurement summary (if requested) */ summary?: LoudnormSummary; }; export type LoudnormSummary = { /** * Input integrated loudness in LUFS */ input_integrated?: number; /** * Input loudness range in LU */ input_lra?: number; /** * Input threshold in LUFS */ input_threshold?: number; /** * Input true peak in dBTP */ input_true_peak?: number; /** * Type of normalization applied (Dynamic/Linear) */ normalization_type?: string; /** * Output integrated loudness in LUFS */ output_integrated?: number; /** * Output loudness range in LU */ output_lra?: number; /** * Output threshold in LUFS */ output_threshold?: number; /** * Output true peak in dBTP */ output_true_peak?: number; /** * Target offset in LU */ target_offset?: number; }; export type Ltx23AudioToVideoInput = { /** * The aspect ratio of the generated video. If 'auto', the aspect ratio will be determined automatically based on the input image, or defaults to 16:9 if no image is provided. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16"; /** * URL of the audio file to generate a video from. Duration must be between 2 and 20 seconds. Must be publicly accessible or base64 data URI. */ audio_url: string | Blob | File; /** * Guidance scale for video generation. Higher values make the output more closely follow the prompt. Defaults to 5 for text-to-video, or 9 when providing an image. */ guidance_scale?: number; /** * URL of an image to use as the first frame of the video. If not provided, prompt is required. */ image_url?: string | Blob | File; /** * Text description of how the video should be generated. Required if image_url is not provided. When image_url is provided, this describes how the image should be animated. */ prompt?: string; }; export type Ltx23ImageToVideoInput = { /** * The aspect ratio of the generated video. If 'auto', the aspect ratio will be determined automatically based on the input image. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16"; /** * The duration of the generated video in seconds Default value: `"6"` */ duration?: "6" | "8" | "10"; /** * The URL of the end image to use for the generated video. When provided, generates a transition video between start and end frames. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video Default value: `"25"` */ fps?: "24" | "25" | "48" | "50"; /** * Whether to generate audio for the generated video Default value: `true` */ generate_audio?: boolean; /** * The URL of the start image to use for the generated video. */ image_url: string | Blob | File; /** * The prompt to use for the generated video */ prompt: string; /** * The resolution of the generated video Default value: `"1080p"` */ resolution?: "1080p" | "1440p" | "2160p"; }; export type Ltx23TextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * The duration of the generated video in seconds Default value: `"6"` */ duration?: "6" | "8" | "10"; /** * The frames per second of the generated video Default value: `"25"` */ fps?: "24" | "25" | "48" | "50"; /** * Whether to generate audio for the generated video Default value: `true` */ generate_audio?: boolean; /** * The prompt to use for the generated video */ prompt: string; /** * The resolution of the generated video Default value: `"1080p"` */ resolution?: "1080p" | "1440p" | "2160p"; }; export type Ltx2AudioToVideoInput = { /** * URL of the audio file to generate a video from. Duration must be between 2 and 20 seconds. Must be publicly accessible or base64 data URI. */ audio_url: string | Blob | File; /** * Guidance scale for video generation. Higher values make the output more closely follow the prompt. Defaults to 5 for text-to-video, or 9 when providing an image. */ guidance_scale?: number; /** * URL of an image to use as the first frame of the video. If not provided, prompt is required. */ image_url?: string | Blob | File; /** * Text description of how the video should be generated. Required if image_url is not provided. When image_url is provided, this describes how the image should be animated. */ prompt?: string; }; export type LTX2AudioToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Values below 1.0 will allow the model to change the audio, while a value of exactly 1.0 will use the input audio without modification. Default value: `1` */ audio_strength?: number; /** * The URL of the audio to generate the video from. */ audio_url: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * Optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * When enabled, the number of frames will be calculated based on the audio duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_audio_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * Whether to preprocess the audio before using it as conditioning. Default value: `true` */ preprocess_audio?: boolean; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Use 'auto' to match the input image dimensions if provided. Default value: `landscape_4_3` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2DistilledAudioToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Values below 1.0 will allow the model to change the audio, while a value of exactly 1.0 will use the input audio without modification. Default value: `1` */ audio_strength?: number; /** * The URL of the audio to generate the video from. */ audio_url: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * Optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * When enabled, the number of frames will be calculated based on the audio duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_audio_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * Whether to preprocess the audio before using it as conditioning. Default value: `true` */ preprocess_audio?: boolean; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Use 'auto' to match the input image dimensions if provided. Default value: `landscape_4_3` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2DistilledExtendVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the extended video. */ end_image_url?: string | Blob | File; /** * Direction to extend the video. 'forward' extends from the end of the video, 'backward' extends from the beginning. Default value: `"forward"` */ extend_direction?: "forward" | "backward"; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to use as context for the extension. Default value: `25` */ num_context_frames?: number; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to extend. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2DistilledImageToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * The URL of the image to generate the video from. */ image_url: string | Blob | File; /** * The direction to interpolate the image sequence in. 'Forward' goes from the start image to the end image, 'Backward' goes from the end image to the start image. Default value: `"forward"` */ interpolation_direction?: "forward" | "backward"; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The prompt used for the generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2DistilledRetakeVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to use as end context for the retake. Default value: `25` */ num_end_context_frames?: number; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of frames to use as start context for the retake. Default value: `25` */ num_start_context_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The retake mode to use for the retake Default value: `"replace_audio_and_video"` */ retake_mode?: "replace_audio" | "replace_video" | "replace_audio_and_video"; /** * The seed for the random number generator. */ seed?: number; /** * The frame of the video to begin retaking from Default value: `24` */ start_frame?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to retake. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2DistilledTextToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `landscape_4_3` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2DistilledVideoToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * An optional URL of an audio to use as the audio for the video. If not provided, any audio present in the input video will be used. */ audio_url?: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The type of IC-LoRA to load. In-Context LoRA weights are used to condition the video based on edge, depth, or pose videos. Only change this from `match_preprocessor` if your videos are already preprocessed (or you are using the detailer.) Default value: `"match_preprocessor"` */ ic_lora?: "match_preprocessor" | "canny" | "depth" | "pose" | "detailer" | "none"; /** * The scale of the IC-LoRA to use. This allows you to control the strength of the IC-LoRA. Default value: `1` */ ic_lora_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * An optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * When enabled, the number of frames will be calculated based on the video duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_video_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The preprocessor to use for the video. When a preprocessor is used and `ic_lora_type` is set to `match_preprocessor`, the IC-LoRA will be loaded based on the preprocessor type. Default value: `"none"` */ preprocessor?: "depth" | "canny" | "pose" | "none"; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to generate the video from. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type Ltx2ExtendVideoInput = { /** * Number of seconds from the input video to use as context for the extension (minimum 1 second, maximum 20 seconds). If not provided, defaults to maximize available context within the 505 frame limit. */ context?: number; /** * Duration in seconds to extend the video. Maximum 20 seconds. Default value: `5` */ duration?: number; /** * Where to extend the video: 'end' extends at the end, 'start' extends at the beginning. Default value: `"end"` */ mode?: "start" | "end"; /** * Description of what should happen in the extended portion of the video. */ prompt?: string; /** * The URL of the video to extend */ video_url: string | Blob | File; }; export type LTX2ExtendVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the extended video. */ end_image_url?: string | Blob | File; /** * Direction to extend the video. 'forward' extends from the end of the video, 'backward' extends from the beginning. Default value: `"forward"` */ extend_direction?: "forward" | "backward"; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to use as context for the extension. Default value: `25` */ num_context_frames?: number; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to extend. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2ExtendVideoOutput = { /** * The prompt used for the generation. */ prompt: string; /** * The seed used for the random number generator. */ seed: number; /** * The generated video. */ video: VideoFile; }; export type Ltx2ImageToVideoInput = { /** * The duration of the generated video in seconds Default value: `"6"` */ duration?: "6" | "8" | "10"; /** * The frames per second of the generated video Default value: `"25"` */ fps?: "25" | "50"; /** * Whether to generate audio for the generated video Default value: `true` */ generate_audio?: boolean; /** * URL of the image to generate the video from. Must be publicly accessible or base64 data URI. Supports PNG, JPEG, WebP, AVIF, and HEIF formats. */ image_url: string | Blob | File; /** * The prompt to generate the video from */ prompt: string; /** * The resolution of the generated video Default value: `"1080p"` */ resolution?: "1080p" | "1440p" | "2160p"; }; export type LTX2ImageToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * The URL of the image to generate the video from. */ image_url: string | Blob | File; /** * The direction to interpolate the image sequence in. 'Forward' goes from the start image to the end image, 'Backward' goes from the end image to the start image. Default value: `"forward"` */ interpolation_direction?: "forward" | "backward"; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The prompt used for the generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRAAudioToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Values below 1.0 will allow the model to change the audio, while a value of exactly 1.0 will use the input audio without modification. Default value: `1` */ audio_strength?: number; /** * The URL of the audio to generate the video from. */ audio_url: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * Optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * The LoRAs to use for the generation. */ loras: Array; /** * When enabled, the number of frames will be calculated based on the audio duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_audio_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * Whether to preprocess the audio before using it as conditioning. Default value: `true` */ preprocess_audio?: boolean; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Use 'auto' to match the input image dimensions if provided. Default value: `landscape_4_3` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRADistilledAudioToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Values below 1.0 will allow the model to change the audio, while a value of exactly 1.0 will use the input audio without modification. Default value: `1` */ audio_strength?: number; /** * The URL of the audio to generate the video from. */ audio_url: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * Optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * The LoRAs to use for the generation. */ loras: Array; /** * When enabled, the number of frames will be calculated based on the audio duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_audio_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * Whether to preprocess the audio before using it as conditioning. Default value: `true` */ preprocess_audio?: boolean; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Use 'auto' to match the input image dimensions if provided. Default value: `landscape_4_3` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRADistilledExtendVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the extended video. */ end_image_url?: string | Blob | File; /** * Direction to extend the video. 'forward' extends from the end of the video, 'backward' extends from the beginning. Default value: `"forward"` */ extend_direction?: "forward" | "backward"; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The LoRAs to use for the generation. */ loras: Array; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to use as context for the extension. Default value: `25` */ num_context_frames?: number; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to extend. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRADistilledImageToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * The URL of the image to generate the video from. */ image_url: string | Blob | File; /** * The direction to interpolate the image sequence in. 'Forward' goes from the start image to the end image, 'Backward' goes from the end image to the start image. Default value: `"forward"` */ interpolation_direction?: "forward" | "backward"; /** * The LoRAs to use for the generation. */ loras: Array; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The prompt used for the generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRADistilledRetakeVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The LoRAs to use for the generation. */ loras: Array; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to use as end context for the retake. Default value: `25` */ num_end_context_frames?: number; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of frames to use as start context for the retake. Default value: `25` */ num_start_context_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The retake mode to use for the retake Default value: `"replace_audio_and_video"` */ retake_mode?: "replace_audio" | "replace_video" | "replace_audio_and_video"; /** * The seed for the random number generator. */ seed?: number; /** * The frame of the video to begin retaking from Default value: `24` */ start_frame?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to retake. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRADistilledTextToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The LoRAs to use for the generation. */ loras: Array; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `landscape_4_3` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRADistilledVideoToVideoInput = { /** * The acceleration level to use. Default value: `"none"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * An optional URL of an audio to use as the audio for the video. If not provided, any audio present in the input video will be used. */ audio_url?: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The type of IC-LoRA to load. In-Context LoRA weights are used to condition the video based on edge, depth, or pose videos. Only change this from `match_preprocessor` if your videos are already preprocessed (or you are using the detailer.) Default value: `"match_preprocessor"` */ ic_lora?: "match_preprocessor" | "canny" | "depth" | "pose" | "detailer" | "none"; /** * The scale of the IC-LoRA to use. This allows you to control the strength of the IC-LoRA. Default value: `1` */ ic_lora_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * An optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * The LoRAs to use for the generation. */ loras: Array; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * When enabled, the number of frames will be calculated based on the video duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_video_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The preprocessor to use for the video. When a preprocessor is used and `ic_lora_type` is set to `match_preprocessor`, the IC-LoRA will be loaded based on the preprocessor type. Default value: `"none"` */ preprocessor?: "depth" | "canny" | "pose" | "none"; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to generate the video from. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRAExtendVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the extended video. */ end_image_url?: string | Blob | File; /** * Direction to extend the video. 'forward' extends from the end of the video, 'backward' extends from the beginning. Default value: `"forward"` */ extend_direction?: "forward" | "backward"; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The LoRAs to use for the generation. */ loras: Array; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to use as context for the extension. Default value: `25` */ num_context_frames?: number; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to extend. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRAImageToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * The URL of the image to generate the video from. */ image_url: string | Blob | File; /** * The direction to interpolate the image sequence in. 'Forward' goes from the start image to the end image, 'Backward' goes from the end image to the start image. Default value: `"forward"` */ interpolation_direction?: "forward" | "backward"; /** * The LoRAs to use for the generation. */ loras: Array; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The prompt used for the generation. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRATextToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The LoRAs to use for the generation. */ loras: Array; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `landscape_4_3` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2LoRAVideoToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * An optional URL of an audio to use as the audio for the video. If not provided, any audio present in the input video will be used. */ audio_url?: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The type of IC-LoRA to load. In-Context LoRA weights are used to condition the video based on edge, depth, or pose videos. Only change this from `match_preprocessor` if your videos are already preprocessed (or you are using the detailer.) Default value: `"match_preprocessor"` */ ic_lora?: "match_preprocessor" | "canny" | "depth" | "pose" | "detailer" | "none"; /** * The scale of the IC-LoRA to use. This allows you to control the strength of the IC-LoRA. Default value: `1` */ ic_lora_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * An optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * The LoRAs to use for the generation. */ loras: Array; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * When enabled, the number of frames will be calculated based on the video duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_video_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The preprocessor to use for the video. When a preprocessor is used and `ic_lora_type` is set to `match_preprocessor`, the IC-LoRA will be loaded based on the preprocessor type. Default value: `"none"` */ preprocessor?: "depth" | "canny" | "pose" | "none"; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to generate the video from. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type Ltx2RetakeVideoInput = { /** * The duration of the video to retake in seconds Default value: `5` */ duration?: number; /** * The prompt to retake the video with */ prompt: string; /** * The retake mode to use for the retake Default value: `"replace_audio_and_video"` */ retake_mode?: "replace_audio" | "replace_video" | "replace_audio_and_video"; /** * The start time of the video to retake in seconds */ start_time?: number; /** * The URL of the video to retake */ video_url: string | Blob | File; }; export type LTX2RetakeVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to use as end context for the retake. Default value: `25` */ num_end_context_frames?: number; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The number of frames to use as start context for the retake. Default value: `25` */ num_start_context_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The retake mode to use for the retake Default value: `"replace_audio_and_video"` */ retake_mode?: "replace_audio" | "replace_video" | "replace_audio_and_video"; /** * The seed for the random number generator. */ seed?: number; /** * The frame of the video to begin retaking from Default value: `24` */ start_frame?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to retake. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type Ltx2TextToVideoInput = { /** * The duration of the generated video in seconds Default value: `"6"` */ duration?: "6" | "8" | "10"; /** * The frames per second of the generated video Default value: `"25"` */ fps?: "25" | "50"; /** * Whether to generate audio for the generated video Default value: `true` */ generate_audio?: boolean; /** * The prompt to generate the video from */ prompt: string; /** * The resolution of the generated video Default value: `"1080p"` */ resolution?: "1080p" | "1440p" | "2160p"; }; export type LTX2TextToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `landscape_4_3` */ video_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type LTX2VideoToVideoInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high" | "full"; /** * Audio conditioning strength. Lower values represent more freedom given to the model to change the audio content. Default value: `1` */ audio_strength?: number; /** * An optional URL of an audio to use as the audio for the video. If not provided, any audio present in the input video will be used. */ audio_url?: string | Blob | File; /** * The camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `"none"` */ camera_lora?: "dolly_in" | "dolly_out" | "dolly_left" | "dolly_right" | "jib_up" | "jib_down" | "static" | "none"; /** * The scale of the camera LoRA to use. This allows you to control the camera movement of the generated video more accurately than just prompting the model to move the camera. Default value: `1` */ camera_lora_scale?: number; /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The strength of the end image to use for the video generation. Default value: `1` */ end_image_strength?: number; /** * The URL of the image to use as the end of the video. */ end_image_url?: string | Blob | File; /** * The frames per second of the generated video. Default value: `25` */ fps?: number; /** * Whether to generate audio for the video. Default value: `true` */ generate_audio?: boolean; /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The type of IC-LoRA to load. In-Context LoRA weights are used to condition the video based on edge, depth, or pose videos. Only change this from `match_preprocessor` if your videos are already preprocessed (or you are using the detailer.) Default value: `"match_preprocessor"` */ ic_lora?: "match_preprocessor" | "canny" | "depth" | "pose" | "detailer" | "none"; /** * The scale of the IC-LoRA to use. This allows you to control the strength of the IC-LoRA. Default value: `1` */ ic_lora_scale?: number; /** * The strength of the image to use for the video generation. Default value: `1` */ image_strength?: number; /** * An optional URL of an image to use as the first frame of the video. */ image_url?: string | Blob | File; /** * When true, match the output FPS to the input video's FPS instead of using the default target FPS. Default value: `true` */ match_input_fps?: boolean; /** * When enabled, the number of frames will be calculated based on the video duration and FPS. When disabled, use the specified num_frames. Default value: `true` */ match_video_length?: boolean; /** * The negative prompt to generate the video from. Default value: `"blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."` */ negative_prompt?: string; /** * The number of frames to generate. Default value: `121` */ num_frames?: number; /** * The number of inference steps to use. Default value: `40` */ num_inference_steps?: number; /** * The preprocessor to use for the video. When a preprocessor is used and `ic_lora_type` is set to `match_preprocessor`, the IC-LoRA will be loaded based on the preprocessor type. Default value: `"none"` */ preprocessor?: "depth" | "canny" | "pose" | "none"; /** * The prompt to generate the video from. */ prompt: string; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Whether to use multi-scale generation. If True, the model will generate the video at a smaller scale first, then use the smaller video to guide the generation of a video at or above your requested size. This results in better coherence and details. Default value: `true` */ use_multiscale?: boolean; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the generated video. Default value: `"high"` */ video_quality?: "low" | "medium" | "high" | "maximum"; /** * The size of the generated video. Default value: `auto` */ video_size?: ImageSize | "auto" | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Video conditioning strength. Lower values represent more freedom given to the model to change the video content. Default value: `1` */ video_strength?: number; /** * The URL of the video to generate the video from. */ video_url: string | Blob | File; /** * The write mode of the generated video. Default value: `"balanced"` */ video_write_mode?: "fast" | "balanced" | "small"; }; export type Ltx2VideoTrainerInput = { /** * Aspect ratio to use for training. Default value: `"1:1"` */ aspect_ratio?: "16:9" | "1:1" | "9:16"; /** * Normalize audio peak amplitude to a consistent level. Recommended for consistent audio levels across the dataset. Default value: `true` */ audio_normalize?: boolean; /** * When audio duration doesn't match video duration, stretch/compress audio without changing pitch. If disabled, audio is trimmed or padded with silence. Default value: `true` */ audio_preserve_pitch?: boolean; /** * If true, videos will be automatically scaled to the target frame count and fps. This option has no effect on image datasets. */ auto_scale_input?: boolean; /** * When enabled, the trainer returns a downloadable archive of your preprocessed training data for manual inspection. Use this to verify that your videos, images, and captions were processed correctly before committing to a full training run. */ debug_dataset?: boolean; /** * Probability of conditioning on the first frame during training. Higher values improve image-to-video performance. Default value: `0.5` */ first_frame_conditioning_p?: number; /** * Target frames per second for the video. Default value: `25` */ frame_rate?: number; /** * Whether to generate audio in validation samples. Default value: `true` */ generate_audio_in_validation?: boolean; /** * Learning rate for optimization. Higher values can lead to faster training but may cause overfitting. Default value: `0.0002` */ learning_rate?: number; /** * Number of frames per training sample. Must satisfy frames % 8 == 1 (e.g., 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97). Default value: `89` */ number_of_frames?: number; /** * The number of training steps. Default value: `2000` */ number_of_steps?: number; /** * The rank of the LoRA adaptation. Higher values increase capacity but use more memory. Default value: `"32"` */ rank?: "8" | "16" | "32" | "64" | "128"; /** * Resolution to use for training. Higher resolutions require more memory. Default value: `"medium"` */ resolution?: "low" | "medium" | "high"; /** * The duration threshold in seconds. If a video is longer than this, it will be split into scenes. Default value: `30` */ split_input_duration_threshold?: number; /** * If true, videos above a certain duration threshold will be split into scenes. Default value: `true` */ split_input_into_scenes?: boolean; /** * STG (Spatio-Temporal Guidance) scale. 0.0 disables STG. Recommended value is 1.0. Default value: `1` */ stg_scale?: number; /** * URL to zip archive with videos or images. Try to use at least 10 files, although more is better. * * **Supported video formats:** .mp4, .mov, .avi, .mkv * **Supported image formats:** .png, .jpg, .jpeg * * Note: The dataset must contain ONLY videos OR ONLY images - mixed datasets are not supported. * * The archive can also contain text files with captions. Each text file should have the same name as the media file it corresponds to. */ training_data_url: string | Blob | File; /** * A phrase that will trigger the LoRA style. Will be prepended to captions during training. Default value: `""` */ trigger_phrase?: string; /** * A list of validation prompts to use during training. When providing an image, _all_ validation inputs must have an image. */ validation?: Array; /** * The aspect ratio to use for validation. Default value: `"1:1"` */ validation_aspect_ratio?: "16:9" | "1:1" | "9:16"; /** * Target frames per second for validation videos. Default value: `25` */ validation_frame_rate?: number; /** * A negative prompt to use for validation. Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ validation_negative_prompt?: string; /** * The number of frames in validation videos. Default value: `89` */ validation_number_of_frames?: number; /** * The resolution to use for validation. Default value: `"high"` */ validation_resolution?: "low" | "medium" | "high"; /** * Enable joint audio-video training. If None (default), automatically detects whether input videos have audio. Set to True to force audio training, or False to disable. */ with_audio?: boolean; }; export type Ltx2VideoTrainerOutput = { /** * Configuration used for setting up inference endpoints. */ config_file: File; /** * A downloadable archive containing the preprocessed training data, including decoded videos and audio. Only present when `debug_dataset` is enabled in the input. */ debug_dataset?: File; /** * URL to the trained LoRA weights (.safetensors). */ lora_file: File; /** * The URL to the validation videos, if any. */ video?: File; }; export type LtxVideo13bDevInput = { /** * Aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "9:16" | "1:1" | "16:9"; /** * Whether to use a detail pass. If True, the model will perform a second pass to refine the video and enhance details. This incurs a 2.0x cost multiplier on the base price. */ enable_detail_pass?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to expand the prompt using a language model. */ expand_prompt?: boolean; /** * Number of inference steps during the first pass. Default value: `30` */ first_pass_num_inference_steps?: number; /** * The frame rate of the video. Default value: `24` */ frame_rate?: number; /** * LoRA weights to use for generation */ loras?: Array; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * The number of frames in the video. Default value: `121` */ num_frames?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Whether to reverse the video. */ reverse_video?: boolean; /** * Number of inference steps during the second pass. Default value: `30` */ second_pass_num_inference_steps?: number; /** * The number of inference steps to skip in the initial steps of the second pass. By skipping some steps at the beginning, the second pass can focus on smaller details instead of larger changes. Default value: `17` */ second_pass_skip_initial_steps?: number; /** * Random seed for generation */ seed?: number; /** * The factor for adaptive instance normalization (AdaIN) applied to generated video chunks after the first. This can help deal with a gradual increase in saturation/contrast in the generated video by normalizing the color distribution across the video. A high value will ensure the color distribution is more consistent across the video, while a low value will allow for more variation in color distribution. Default value: `0.5` */ temporal_adain_factor?: number; /** * The compression ratio for tone mapping. This is used to compress the dynamic range of the video to improve visual quality. A value of 0.0 means no compression, while a value of 1.0 means maximum compression. */ tone_map_compression_ratio?: number; }; export type LtxVideoImageToVideoInput = { /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The URL of the image to generate the video from. */ image_url: string | Blob | File; /** * The negative prompt to generate the video from. Default value: `"low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly"` */ negative_prompt?: string; /** * The number of inference steps to take. Default value: `30` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed to use for random number generation. */ seed?: number; }; export type LtxVideoInput = { /** * The guidance scale to use. Default value: `3` */ guidance_scale?: number; /** * The negative prompt to generate the video from. Default value: `"low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly"` */ negative_prompt?: string; /** * The number of inference steps to take. Default value: `30` */ num_inference_steps?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The seed to use for random number generation. */ seed?: number; }; export type LtxVideoLoraImageToVideoInput = { /** * The aspect ratio of the video. Default value: `"auto"` */ aspect_ratio?: "16:9" | "1:1" | "9:16" | "auto"; /** * Whether to expand the prompt using the LLM. */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the video. Default value: `25` */ frames_per_second?: number; /** * The URL of the image to use as input. */ image_url: string | Blob | File; /** * The LoRA weights to use for generation. */ loras?: Array; /** * The negative prompt to use. Default value: `"blurry, low quality, low resolution, inconsistent motion, jittery, distorted"` */ negative_prompt?: string; /** * The number of inference steps to use. Default value: `30` */ num_inference_steps?: number; /** * The number of frames in the video. Default value: `89` */ number_of_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Whether to reverse the video. */ reverse_video?: boolean; /** * The seed to use for generation. */ seed?: number; }; export type LtxVideoLoraInput = { /** * The aspect ratio of the video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "1:1" | "9:16"; /** * Whether to expand the prompt using the LLM. */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the video. Default value: `25` */ frames_per_second?: number; /** * The LoRA weights to use for generation. */ loras?: Array; /** * The negative prompt to use. Default value: `"blurry, low quality, low resolution, inconsistent motion, jittery, distorted"` */ negative_prompt?: string; /** * The number of inference steps to use. Default value: `30` */ num_inference_steps?: number; /** * The number of frames in the video. Default value: `89` */ number_of_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Whether to reverse the video. */ reverse_video?: boolean; /** * The seed to use for generation. */ seed?: number; }; export type LtxVideoTrainerInput = { /** * The aspect ratio to use for training. This is the aspect ratio of the video. Default value: `"1:1"` */ aspect_ratio?: "16:9" | "1:1" | "9:16"; /** * If true, videos will be automatically scaled to the target frame count and fps. This option has no effect on image datasets. */ auto_scale_input?: boolean; /** * The target frames per second for the video. Default value: `25` */ frame_rate?: number; /** * The rate at which the model learns. Higher values can lead to faster training, but over-fitting. Default value: `0.0002` */ learning_rate?: number; /** * The number of frames to use for training. This is the number of frames per second multiplied by the number of seconds. Default value: `81` */ number_of_frames?: number; /** * The number of steps to train for. Default value: `1000` */ number_of_steps?: number; /** * The rank of the LoRA. Default value: `"128"` */ rank?: "8" | "16" | "32" | "64" | "128"; /** * The resolution to use for training. This is the resolution of the video. Default value: `"medium"` */ resolution?: "low" | "medium" | "high"; /** * The duration threshold in seconds. If a video is longer than this, it will be split into scenes. If you provide captions for a split video, the caption will be applied to each scene. If you do not provide captions, scenes will be auto-captioned. Default value: `30` */ split_input_duration_threshold?: number; /** * If true, videos above a certain duration threshold will be split into scenes. If you provide captions for a split video, the caption will be applied to each scene. If you do not provide captions, scenes will be auto-captioned. This option has no effect on image datasets. Default value: `true` */ split_input_into_scenes?: boolean; /** * URL to zip archive with videos or images. Try to use at least 10 files, although more is better. * * **Supported video formats:** .mp4, .mov, .avi, .mkv * **Supported image formats:** .png, .jpg, .jpeg * * Note: The dataset must contain ONLY videos OR ONLY images - mixed datasets are not supported. * * The archive can also contain text files with captions. Each text file should have the same name as the media file it corresponds to. */ training_data_url: string | Blob | File; /** * The phrase that will trigger the model to generate an image. Default value: `""` */ trigger_phrase?: string; /** * A list of validation prompts to use during training. When providing an image, _all_ validation inputs must have an image. */ validation?: Array; /** * The aspect ratio to use for validation. Default value: `"1:1"` */ validation_aspect_ratio?: "16:9" | "1:1" | "9:16"; /** * A negative prompt to use for validation. Default value: `"blurry, low quality, bad quality, out of focus"` */ validation_negative_prompt?: string; /** * The number of frames to use for validation. Default value: `81` */ validation_number_of_frames?: number; /** * The resolution to use for validation. Default value: `"high"` */ validation_resolution?: "low" | "medium" | "high"; /** * If true, the validation videos will be reversed. This is useful for effects that are learned in reverse and then applied in reverse. */ validation_reverse?: boolean; }; export type LtxVideoTrainerOutput = { /** * Configuration used for setting up the inference endpoints. */ config_file: File; /** * URL to the trained LoRA weights. */ lora_file: File; /** * The URL to the validations video. */ video?: File; }; export type LtxVideoV095ImageToVideoInput = { /** * Aspect ratio of the generated video (16:9 or 9:16). Default value: `"16:9"` */ aspect_ratio?: "9:16" | "16:9"; /** * Whether to expand the prompt using the model's own capabilities. Default value: `true` */ expand_prompt?: boolean; /** * Image URL for Image-to-Video task */ image_url: string | Blob | File; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * Number of inference steps Default value: `40` */ num_inference_steps?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video (480p or 720p). Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for generation */ seed?: number; }; export type LtxVideoV095Input = { /** * Aspect ratio of the generated video (16:9 or 9:16). Default value: `"16:9"` */ aspect_ratio?: "9:16" | "16:9"; /** * Whether to expand the prompt using the model's own capabilities. Default value: `true` */ expand_prompt?: boolean; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * Number of inference steps Default value: `40` */ num_inference_steps?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video (480p or 720p). Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for generation */ seed?: number; }; export type lucidfluxInput = { /** * The guidance to use for the diffusion process. Default value: `4` */ guidance?: number; /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * Number of inference steps for sampling. Higher values give better quality but take longer. Default value: `50` */ num_inference_steps?: number; /** * The prompt to edit the image. */ prompt: string; /** * Seed used for random number generation Default value: `42` */ seed?: number; /** * The height of the output image. Default value: `1024` */ target_height?: number; /** * The width of the output image. Default value: `1024` */ target_width?: number; }; export type LucyEditDevInput = { /** * Whether to enhance the prompt for better results. Default value: `true` */ enhance_prompt?: boolean; /** * Text description of the desired video content */ prompt: string; /** * If set to true, the function will wait for the video to be generated * and uploaded before returning the response. This will increase the * latency of the function but it allows you to get the video directly * in the response without going through the CDN. Default value: `true` */ sync_mode?: boolean; /** * URL of the video to edit */ video_url: string | Blob | File; }; export type LucyEditProInput = { /** * Whether to enhance the prompt for better results. Default value: `true` */ enhance_prompt?: boolean; /** * Text description of the desired video content */ prompt: string; /** * Resolution of the generated video Default value: `"720p"` */ resolution?: "720p"; /** * If set to true, the function will wait for the video to be generated * and uploaded before returning the response. This will increase the * latency of the function but it allows you to get the video directly * in the response without going through the CDN. Default value: `true` */ sync_mode?: boolean; /** * URL of the video to edit */ video_url: string | Blob | File; }; export type LucyI2vInput = { /** * Aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "9:16" | "16:9"; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Text description of the desired video content */ prompt: string; /** * If set to true, the function will wait for the image to be generated * and uploaded before returning the response. This will increase the * latency of the function but it allows you to get the image directly * in the response without going through the CDN. Default value: `true` */ sync_mode?: boolean; }; export type LucyRestyleInput = { /** * Whether to enhance the prompt for better results. Default value: `true` */ enhance_prompt?: boolean; /** * Text description of the desired video content */ prompt: string; /** * Resolution of the generated video Default value: `"720p"` */ resolution?: "720p"; /** * Seed for video generation */ seed?: number; /** * If set to true, the function will wait for the video to be generated * and uploaded before returning the response. This will increase the * latency of the function but it allows you to get the video directly * in the response without going through the CDN. */ sync_mode?: boolean; /** * URL of the video to edit */ video_url: string | Blob | File; }; export type LumaDreamMachineRay2FlashModifyInput = { /** * Optional URL of the first frame image for modification */ image_url?: string | Blob | File; /** * Amount of modification to apply to the video, adhere_1 is the least amount of modification, reimagine_3 is the most Default value: `"flex_1"` */ mode?: "adhere_1" | "adhere_2" | "adhere_3" | "flex_1" | "flex_2" | "flex_3" | "reimagine_1" | "reimagine_2" | "reimagine_3"; /** * Instruction for modifying the video */ prompt?: string; /** * URL of the input video to modify */ video_url: string | Blob | File; }; export type LumaDreamMachineRay2ImageToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; /** * The duration of the generated video Default value: `"5s"` */ duration?: "5s" | "9s"; /** * Final image to end the video with. Can be used together with image_url. */ end_image_url?: string | Blob | File; /** * Initial image to start the video from. Can be used together with end_image_url. */ image_url?: string | Blob | File; /** * Whether the video should loop (end of video is blended with the beginning) */ loop?: boolean; /** * */ prompt: string; /** * The resolution of the generated video (720p costs 2x more, 1080p costs 4x more) Default value: `"540p"` */ resolution?: "540p" | "720p" | "1080p"; }; export type LumaDreamMachineRay2Input = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; /** * The duration of the generated video (9s costs 2x more) Default value: `"5s"` */ duration?: "5s" | "9s"; /** * Whether the video should loop (end of video is blended with the beginning) */ loop?: boolean; /** * */ prompt: string; /** * The resolution of the generated video (720p costs 2x more, 1080p costs 4x more) Default value: `"540p"` */ resolution?: "540p" | "720p" | "1080p"; }; export type LumaDreamMachineRay2ReframeInput = { /** * The aspect ratio of the reframed video */ aspect_ratio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; /** * X position of the grid for reframing */ grid_position_x?: number; /** * Y position of the grid for reframing */ grid_position_y?: number; /** * Optional URL of the first frame image for reframing */ image_url?: string | Blob | File; /** * Optional prompt for reframing */ prompt?: string; /** * URL of the input video to reframe */ video_url: string | Blob | File; /** * End X coordinate for reframing */ x_end?: number; /** * Start X coordinate for reframing */ x_start?: number; /** * End Y coordinate for reframing */ y_end?: number; /** * Start Y coordinate for reframing */ y_start?: number; }; export type LumaPhotonInput = { /** * The aspect ratio of the generated video Default value: `"1:1"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:3" | "3:4" | "21:9" | "9:21"; /** * */ prompt: string; }; export type LumaPhotonModifyInput = { /** * The aspect ratio of the reframed image */ aspect_ratio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; /** * URL of the input image to reframe */ image_url: string | Blob | File; /** * Instruction for modifying the image */ prompt?: string; /** * The strength of the initial image. Higher strength values are corresponding to more influence of the initial image on the output. */ strength: number; }; export type LumaPhotonReframeInput = { /** * The aspect ratio of the reframed image */ aspect_ratio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; /** * X position of the grid for reframing */ grid_position_x?: number; /** * Y position of the grid for reframing */ grid_position_y?: number; /** * URL of the input image to reframe */ image_url: string | Blob | File; /** * Optional prompt for reframing */ prompt?: string; /** * End X coordinate for reframing */ x_end?: number; /** * Start X coordinate for reframing */ x_start?: number; /** * End Y coordinate for reframing */ y_end?: number; /** * Start Y coordinate for reframing */ y_start?: number; }; export type LuminaImageV2Input = { /** * Whether to apply normalization-based guidance scale. Default value: `true` */ cfg_normalization?: boolean; /** * The ratio of the timestep interval to apply normalization-based guidance scale. Default value: `1` */ cfg_trunc_ratio?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The system prompt to use. Default value: `"You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts."` */ system_prompt?: string; }; export type LuxTtsInput = { /** * URL of the reference audio file for voice cloning. The model will mimic the voice characteristics from this audio. */ audio_url: string | Blob | File; /** * Classifier-free guidance scale. Higher values increase adherence to the reference voice at the cost of diversity. Default value: `3` */ guidance_scale?: number; /** * Maximum length of the reference audio to use for voice encoding, in seconds. Longer durations capture more voice characteristics but increase processing time. Default value: `5` */ max_ref_length?: number; /** * Number of flow-matching inference steps. 4 is recommended for best efficiency. Default value: `4` */ num_inference_steps?: number; /** * The text to be converted to speech. */ prompt: string; /** * Random seed for reproducibility. */ seed?: number; }; export type LuxTtsOutput = { /** * The generated speech audio file at 48kHz. */ audio: File; /** * */ seed: number; /** * */ timings: unknown; }; export type lynxInput = { /** * Aspect ratio of the generated video (16:9, 9:16, or 1:1) Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Frames per second of the generated video. Must be between 5 to 30. Default value: `16` */ frames_per_second?: number; /** * Classifier-free guidance scale. Higher values give better adherence to the prompt but may decrease quality. Default value: `5` */ guidance_scale?: number; /** * Image guidance scale. Controls how closely the generated video follows the reference image. Higher values increase adherence to the reference image but may decrease quality. Default value: `2` */ guidance_scale_2?: number; /** * The URL of the subject image to be used for video generation */ image_url: string | Blob | File; /** * Identity preservation scale. Controls how closely the generated video preserves the subject's identity from the reference image. Default value: `1` */ ip_scale?: number; /** * Negative prompt to guide what should not appear in the generated video Default value: `"Bright tones, overexposed, blurred background, static, subtitles, style, works, paintings, images, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * Number of frames in the generated video. Must be between 9 to 100. Default value: `81` */ num_frames?: number; /** * Number of inference steps for sampling. Higher values give better quality but take longer. Default value: `50` */ num_inference_steps?: number; /** * Text prompt to guide video generation */ prompt: string; /** * Resolution of the generated video (480p, 580p, or 720p) Default value: `"720p"` */ resolution?: "480p" | "580p" | "720p"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; /** * Reference image scale. Controls the influence of the reference image on the generated video. Default value: `1` */ strength?: number; }; export type lynxOutput = { /** * The seed used for generation */ seed: number; /** * The generated video file */ video: VideoFile; }; export type lyria2Input = { /** * A description of what to exclude from the generated audio Default value: `"low quality"` */ negative_prompt?: string; /** * The text prompt describing the music you want to generate */ prompt: string; /** * A seed for deterministic generation. If provided, the model will attempt to produce the same audio given the same prompt and other parameters. */ seed?: number; }; export type MagiExtendVideoInput = { /** * Aspect ratio of the generated video. If 'auto', the aspect ratio will be determined automatically based on the input image. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16" | "1:1"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Number of frames to generate. Must be between 96 and 192 (inclusive). Each additional 24 frames beyond 96 incurs an additional billing unit. Default value: `96` */ num_frames?: number; /** * Number of inference steps for sampling. Higher values give better quality but take longer. Default value: `"16"` */ num_inference_steps?: "4" | "8" | "16" | "32" | "64"; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the generated video (480p or 720p). 480p is 0.5 billing units, and 720p is 1 billing unit. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; /** * The frame to begin the generation from, with the remaining frames will be treated as the prefix video. The final video will contain the frames up until this number unchanged, followed by the generated frames. The default start frame is 32 frames before the end of the video, which gives optimal results. */ start_frame?: number; /** * URL of the input video to represent the beginning of the video. If the input video does not match the chosen aspect ratio, it is resized and center cropped. */ video_url: string | Blob | File; }; export type MagiImageToVideoInput = { /** * Aspect ratio of the generated video. If 'auto', the aspect ratio will be determined automatically based on the input image. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16" | "1:1"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * URL of the input image to represent the first frame of the video. If the input image does not match the chosen aspect ratio, it is resized and center cropped. */ image_url: string | Blob | File; /** * Number of frames to generate. Must be between 96 and 192 (inclusive). Each additional 24 frames beyond 96 incurs an additional billing unit. Default value: `96` */ num_frames?: number; /** * Number of inference steps for sampling. Higher values give better quality but take longer. Default value: `"16"` */ num_inference_steps?: "4" | "8" | "16" | "32" | "64"; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the generated video (480p or 720p). 480p is 0.5 billing units, and 720p is 1 billing unit. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; }; export type magiInput = { /** * Aspect ratio of the generated video. If 'auto', the aspect ratio will be determined automatically based on the input image. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16" | "1:1"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Number of frames to generate. Must be between 96 and 192 (inclusive). Each additional 24 frames beyond 96 incurs an additional billing unit. Default value: `96` */ num_frames?: number; /** * Number of inference steps for sampling. Higher values give better quality but take longer. Default value: `"16"` */ num_inference_steps?: "4" | "8" | "16" | "32" | "64"; /** * The text prompt to guide video generation. */ prompt: string; /** * Resolution of the generated video (480p or 720p). 480p is 0.5 billing units, and 720p is 1 billing unit. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; }; export type magiOutput = { /** * The seed used for generation. */ seed: number; /** * The generated video file. */ video: File; }; export type MakeupApplicationInput = { /** * Aspect ratio for 4K output (default: 3:4 for portraits) */ aspect_ratio?: AspectRatio; /** * Portrait image URL for makeup application */ image_url: string | Blob | File; /** * Default value: `"medium"` */ intensity?: "light" | "medium" | "heavy" | "dramatic"; /** * Default value: `"natural"` */ makeup_style?: "natural" | "glamorous" | "smoky_eyes" | "bold_lips" | "no_makeup" | "remove_makeup" | "dramatic" | "bridal" | "professional" | "korean_style" | "artistic"; }; export type MareyI2vInput = { /** * The dimensions of the generated video in width x height format. Default value: `"1920x1080"` */ dimensions?: "1920x1080" | "1080x1920" | "1152x1152" | "1536x1152" | "1152x1536"; /** * The duration of the generated video. Default value: `"5s"` */ duration?: "5s" | "10s"; /** * Controls how strongly the generation is guided by the prompt (0-20). Higher values follow the prompt more closely. */ guidance_scale?: number; /** * The URL of the image to use as the first frame of the video. */ image_url: string | Blob | File; /** * Negative prompt used to guide the model away from undesirable features. Default value: `" low-poly, flat shader, bad rigging, stiff animation, uncanny eyes, low-quality textures, looping glitch, cheap effect, overbloom, bloom spam, default lighting, game asset, stiff face, ugly specular, AI artifacts"` */ negative_prompt?: string; /** * The prompt to generate a video from */ prompt: string; /** * Seed for random number generation. Use -1 for random seed each run. Default value: `-1` */ seed?: number; }; export type MareyPoseTransferInput = { /** * Optional first frame image URL to use as the first frame of the generated video */ first_frame_image_url?: string | Blob | File; /** * Negative prompt used to guide the model away from undesirable features. Default value: `" low-poly, flat shader, bad rigging, stiff animation, uncanny eyes, low-quality textures, looping glitch, cheap effect, overbloom, bloom spam, default lighting, game asset, stiff face, ugly specular, AI artifacts"` */ negative_prompt?: string; /** * The prompt to generate a video from */ prompt: string; /** * Optional reference image URL to use for pose control or as a starting frame */ reference_image_url?: string | Blob | File; /** * Seed for random number generation. Use -1 for random seed each run. Default value: `-1` */ seed?: number; /** * The URL of the video to use as the control video. */ video_url: string | Blob | File; }; export type MareyT2vInput = { /** * The dimensions of the generated video in width x height format. Default value: `"1920x1080"` */ dimensions?: "1920x1080" | "1152x1152" | "1536x1152" | "1152x1536"; /** * The duration of the generated video. Default value: `"5s"` */ duration?: "5s" | "10s"; /** * Controls how strongly the generation is guided by the prompt (0-20). Higher values follow the prompt more closely. */ guidance_scale?: number; /** * Negative prompt used to guide the model away from undesirable features. Default value: `" low-poly, flat shader, bad rigging, stiff animation, uncanny eyes, low-quality textures, looping glitch, cheap effect, overbloom, bloom spam, default lighting, game asset, stiff face, ugly specular, AI artifacts"` */ negative_prompt?: string; /** * The prompt to generate a video from */ prompt: string; /** * Seed for random number generation. Use -1 for random seed each run. Default value: `-1` */ seed?: number; }; export type MarigoldDepthMapInput = { /** * Number of predictions to average over. Defaults to `10`. The higher the number, the more accurate the result, but the slower the inference. Default value: `10` */ ensemble_size?: number; /** * Input image url. */ image_url: string | Blob | File; /** * Number of denoising steps. Defaults to `10`. The higher the number, the more accurate the result, but the slower the inference. Default value: `10` */ num_inference_steps?: number; /** * Maximum processing resolution. Defaults `0` which means it uses the size of the input image. */ processing_res?: number; }; export type MaskInput = { /** * The URL of the image to remove objects from. */ image_url: string | Blob | File; /** * Amount of pixels to expand the mask by. Range: 0-50 Default value: `15` */ mask_expansion?: number; /** * The URL of the mask image. White pixels (255) indicate areas to remove. */ mask_url: string | Blob | File; /** * Default value: `"best_quality"` */ model?: "low_quality" | "medium_quality" | "high_quality" | "best_quality"; }; export type MaskMetadata = { /** * Bounding box for the mask in normalized cxcywh coordinates. */ box?: Array; /** * Index of the mask inside the model output. */ index: number; /** * Score for this mask. */ score?: number; }; export type MayaBatchInput = { /** * Maximum SNAC tokens per generation. Default value: `2000` */ max_tokens?: number; /** * Output audio format for all generated speech files Default value: `"wav"` */ output_format?: "wav" | "mp3"; /** * List of voice descriptions for each text. Must match the length of texts list. Each describes the voice/character attributes. */ prompts: Array; /** * Repetition penalty for all generations. Default value: `1.1` */ repetition_penalty?: number; /** * Output audio sample rate for all generations. 48 kHz provides higher quality, 24 kHz is faster. Default value: `"48 kHz"` */ sample_rate?: "48 kHz" | "24 kHz"; /** * Sampling temperature for all generations. Default value: `0.4` */ temperature?: number; /** * List of texts to synthesize into speech. You can embed emotion tags in each text using the format . */ texts: Array; /** * Nucleus sampling parameter for all generations. Default value: `0.9` */ top_p?: number; }; export type MayaBatchOutput = { /** * List of generated audio files */ audios: Array; /** * Average real-time factor across all generations */ average_rtf: number; /** * Duration of each generated audio in seconds */ durations: Array; /** * Sample rate of all generated audio files */ sample_rate: string; /** * Total time taken to generate all audio files in seconds */ total_generation_time: number; }; export type mayaInput = { /** * Maximum number of SNAC tokens to generate (7 tokens per frame). Controls maximum audio length. Default value: `2000` */ max_tokens?: number; /** * Output audio format for the generated speech Default value: `"wav"` */ output_format?: "wav" | "mp3"; /** * Description of the voice/character. Includes attributes like age, accent, pitch, timbre, pacing, tone, and intensity. See examples for format. */ prompt: string; /** * Penalty for repeating tokens. Higher values reduce repetition artifacts. Default value: `1.1` */ repetition_penalty?: number; /** * Output audio sample rate. 48 kHz provides higher quality audio, 24 kHz is faster. Default value: `"48 kHz"` */ sample_rate?: "48 kHz" | "24 kHz"; /** * Sampling temperature. Lower values (0.2-0.5) produce more stable/consistent audio. Higher values add variation. Default value: `0.4` */ temperature?: number; /** * The text to synthesize into speech. You can embed emotion tags anywhere in the text using the format . Available emotions: laugh, laugh_harder, sigh, chuckle, gasp, angry, excited, whisper, cry, scream, sing, snort, exhale, gulp, giggle, sarcastic, curious. Example: 'Hello world! This is amazing!' or 'I can't believe this happened again.' */ text: string; /** * Nucleus sampling parameter. Controls diversity of token selection. Default value: `0.9` */ top_p?: number; }; export type mayaOutput = { /** * The generated audio file containing the speech (WAV or MP3 format, 24kHz or 48kHz mono depending on upsampler) */ audio: File; /** * Duration of the generated audio in seconds */ duration: number; /** * Time taken to generate the audio in seconds */ generation_time: number; /** * Real-time factor (generation_time / audio_duration). Lower is better. */ rtf: number; /** * Sample rate of the generated audio */ sample_rate: string; }; export type MergeAudioInput = { /** * List of audio file URLs to merge in order */ audio_urls: Array; /** * Optional list of gap durations in seconds between each audio. Length should be len(audio_urls)-1 */ gaps?: Array; }; export type MergeAudioIntoVideoInput = { /** * URL of the audio file to add to the video */ audio_url: string | Blob | File; /** * If true, the original audio from the video is preserved and mixed with the new audio. If false (default), the original audio is replaced. */ keep_original_audio?: boolean; /** * Offset in seconds for when the new audio should start relative to the video */ start_offset?: number; /** * URL of the video file */ video_url: string | Blob | File; }; export type MergeAudiosInput = { /** * List of audio URLs to merge in order. The 0th stream of the audio will be considered as the merge candidate. */ audio_urls: Array; /** * Output format of the combined audio. If not used, will be determined automatically using FFMPEG. Formatted as codec_sample_rate_bitrate. */ output_format?: "mp3_22050_32" | "mp3_44100_32" | "mp3_44100_64" | "mp3_44100_96" | "mp3_44100_128" | "mp3_44100_192" | "pcm_8000" | "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100" | "pcm_48000" | "ulaw_8000" | "alaw_8000" | "opus_48000_32" | "opus_48000_64" | "opus_48000_96" | "opus_48000_128" | "opus_48000_192"; }; export type MergeImagesInput = { /** * List of image URLs to merge into an array */ image_urls: Array; /** * Output format for processed images Default value: `"png"` */ output_format?: "png" | "jpg" | "jpeg" | "webp"; }; export type MergeTextInput = { /** * Separator to join texts Default value: `"--"` */ separator?: string; /** * List of text strings to merge */ texts: Array; }; export type MergeVideosInput = { /** * Resolution of the final video. Width and height must be between 512 and 2048. */ resolution?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Target FPS for the output video. If not provided, uses the lowest FPS from input videos. */ target_fps?: number; /** * List of video URLs to merge in order */ video_urls: Array; }; export type MergeVideosOutput = { /** * Metadata about the merged video including original video info */ metadata: unknown; /** * Merged video file */ video: File; }; export type MetadataInput = { /** * Whether to extract the start and end frames for videos. Note that when true the request will be slower. */ extract_frames?: boolean; /** * URL of the media file (video or audio) to analyze */ media_url: string | Blob | File; }; export type MetadataOutput = { /** * Metadata for the analyzed media file (either Video or Audio) */ media: Video | Audio; }; export type MiDaSInput = { /** * A parameter for the MiDaS detector Default value: `6.283185307179586` */ a?: number; /** * Background threshold for the MiDaS detector Default value: `0.1` */ background_threshold?: number; /** * URL of the image to process */ image_url: string | Blob | File; }; export type MiDaSOutput = { /** * Image with MiDaS depth map */ depth_map: Image; /** * Image with MiDaS normal map */ normal_map: Image; }; export type MinimaxImage01Input = { /** * Aspect ratio of the generated image Default value: `"1:1"` */ aspect_ratio?: "1:1" | "16:9" | "4:3" | "3:2" | "2:3" | "3:4" | "9:16" | "21:9"; /** * Number of images to generate (1-9) Default value: `1` */ num_images?: number; /** * Text prompt for image generation (max 1500 characters) */ prompt: string; /** * Whether to enable automatic prompt optimization */ prompt_optimizer?: boolean; }; export type MinimaxImage01SubjectReferenceInput = { /** * Aspect ratio of the generated image Default value: `"1:1"` */ aspect_ratio?: "1:1" | "16:9" | "4:3" | "3:2" | "2:3" | "3:4" | "9:16" | "21:9"; /** * URL of the subject reference image to use for consistent character appearance */ image_url: string | Blob | File; /** * Number of images to generate (1-9) Default value: `1` */ num_images?: number; /** * Text prompt for image generation (max 1500 characters) */ prompt: string; /** * Whether to enable automatic prompt optimization */ prompt_optimizer?: boolean; }; export type MinimaxMusicV2Input = { /** * Audio configuration settings */ audio_setting?: AudioSetting; /** * Lyrics of the song. Use n to separate lines. You may add structure tags like [Intro], [Verse], [Chorus], [Bridge], [Outro] to enhance the arrangement. 10-3000 characters. */ lyrics_prompt: string; /** * A description of the music, specifying style, mood, and scenario. 10-300 characters. */ prompt: string; }; export type MinimaxSpeech02HdInput = { /** * Audio configuration settings */ audio_setting?: AudioSetting; /** * Enhance recognition of specified languages and dialects */ language_boost?: "Chinese" | "Chinese,Yue" | "English" | "Arabic" | "Russian" | "Spanish" | "French" | "Portuguese" | "German" | "Turkish" | "Dutch" | "Ukrainian" | "Vietnamese" | "Indonesian" | "Japanese" | "Italian" | "Korean" | "Thai" | "Polish" | "Romanian" | "Greek" | "Czech" | "Finnish" | "Hindi" | "Bulgarian" | "Danish" | "Hebrew" | "Malay" | "Slovak" | "Swedish" | "Croatian" | "Hungarian" | "Norwegian" | "Slovenian" | "Catalan" | "Nynorsk" | "Afrikaans" | "auto"; /** * Format of the output content (non-streaming only) Default value: `"hex"` */ output_format?: "url" | "hex"; /** * Custom pronunciation dictionary for text replacement */ pronunciation_dict?: PronunciationDict; /** * Text to convert to speech (max 5000 characters, minimum 1 non-whitespace character) */ text: string; /** * Voice configuration settings */ voice_setting?: VoiceSetting; }; export type MinimaxSpeech26HdInput = { /** * Audio configuration settings */ audio_setting?: AudioSetting; /** * Enhance recognition of specified languages and dialects */ language_boost?: "Chinese" | "Chinese,Yue" | "English" | "Arabic" | "Russian" | "Spanish" | "French" | "Portuguese" | "German" | "Turkish" | "Dutch" | "Ukrainian" | "Vietnamese" | "Indonesian" | "Japanese" | "Italian" | "Korean" | "Thai" | "Polish" | "Romanian" | "Greek" | "Czech" | "Finnish" | "Hindi" | "Bulgarian" | "Danish" | "Hebrew" | "Malay" | "Slovak" | "Swedish" | "Croatian" | "Hungarian" | "Norwegian" | "Slovenian" | "Catalan" | "Nynorsk" | "Afrikaans" | "auto"; /** * Loudness normalization settings for the audio */ normalization_setting?: LoudnessNormalizationSetting; /** * Format of the output content (non-streaming only) Default value: `"hex"` */ output_format?: "url" | "hex"; /** * Text to convert to speech. Paragraph breaks should be marked with newline characters. **NOTE**: You can customize speech pauses by adding markers in the form `<#x#>`, where `x` is the pause duration in seconds. Valid range: `[0.01, 99.99]`, up to two decimal places. Pause markers must be placed between speakable text segments and cannot be used consecutively. */ prompt: string; /** * Custom pronunciation dictionary for text replacement */ pronunciation_dict?: PronunciationDict; /** * Voice configuration settings */ voice_setting?: VoiceSetting; }; export type MinimaxSpeech28HdInput = { /** * Audio configuration settings */ audio_setting?: AudioSetting; /** * Enhance recognition of specified languages and dialects */ language_boost?: "Chinese" | "Chinese,Yue" | "English" | "Arabic" | "Russian" | "Spanish" | "French" | "Portuguese" | "German" | "Turkish" | "Dutch" | "Ukrainian" | "Vietnamese" | "Indonesian" | "Japanese" | "Italian" | "Korean" | "Thai" | "Polish" | "Romanian" | "Greek" | "Czech" | "Finnish" | "Hindi" | "Bulgarian" | "Danish" | "Hebrew" | "Malay" | "Slovak" | "Swedish" | "Croatian" | "Hungarian" | "Norwegian" | "Slovenian" | "Catalan" | "Nynorsk" | "Afrikaans" | "auto"; /** * Loudness normalization settings for the audio */ normalization_setting?: LoudnessNormalizationSetting; /** * Format of the output content (non-streaming only) Default value: `"hex"` */ output_format?: "url" | "hex"; /** * Text to convert to speech. Use `<#x#>` for pauses (x = 0.01-99.99 seconds). Supports interjection tags: `(laughs)`, `(sighs)`, `(coughs)`, `(clears throat)`, `(gasps)`, `(sniffs)`, `(groans)`, `(yawns)`. */ prompt: string; /** * Custom pronunciation dictionary for text replacement */ pronunciation_dict?: PronunciationDict; /** * Voice modification settings to adjust pitch, intensity, and timbre. */ voice_modify?: VoiceModify; /** * Voice configuration settings */ voice_setting?: VoiceSetting; }; export type MinimaxVideo01Input = { /** * */ prompt: string; /** * Whether to use the model's prompt optimizer Default value: `true` */ prompt_optimizer?: boolean; }; export type MinimaxVideo01SubjectReferenceInput = { /** * */ prompt: string; /** * Whether to use the model's prompt optimizer Default value: `true` */ prompt_optimizer?: boolean; /** * URL of the subject reference image to use for consistent subject appearance */ subject_reference_image_url: string | Blob | File; }; export type MinimaxVoiceCloneInput = { /** * Text validation accuracy threshold (0-1) */ accuracy?: number; /** * URL of the input audio file for voice cloning. Should be at least 10 seconds * long. To retain the voice permanently, use it with a TTS (text-to-speech) * endpoint at least once within 7 days. Otherwise, it will be * automatically deleted. */ audio_url: string | Blob | File; /** * TTS model to use for preview. Options: speech-02-hd, speech-02-turbo, speech-01-hd, speech-01-turbo Default value: `"speech-02-hd"` */ model?: "speech-02-hd" | "speech-02-turbo" | "speech-01-hd" | "speech-01-turbo"; /** * Enable volume normalization for the cloned voice */ need_volume_normalization?: boolean; /** * Enable noise reduction for the cloned voice */ noise_reduction?: boolean; /** * Text to generate a TTS preview with the cloned voice (optional) Default value: `"Hello, this is a preview of your cloned voice! I hope you like it!"` */ text?: string; }; export type MinimaxVoiceDesignInput = { /** * Text for audio preview. Limited to 500 characters. A fee of $30 per 1M characters will be charged for the generation of the preview audio. */ preview_text: string; /** * Voice description prompt for generating a personalized voice */ prompt: string; }; export type MiniOutput = { /** * The generated speech audio file. */ audio: AudioFile; }; export type MixDehazeNetInput = { /** * URL of image to be used for image enhancement */ image_url: string | Blob | File; /** * Model to be used for dehazing Default value: `"indoor"` */ model?: "indoor" | "outdoor"; /** * seed to be used for generation */ seed?: number; }; export type MLSDInput = { /** * Distance threshold for the MLSD detector Default value: `0.1` */ distance_threshold?: number; /** * URL of the image to process */ image_url: string | Blob | File; /** * Score threshold for the MLSD detector Default value: `0.1` */ score_threshold?: number; }; export type MmaudioV2TextToAudioInput = { /** * The strength of Classifier Free Guidance. Default value: `4.5` */ cfg_strength?: number; /** * The duration of the audio to generate. Default value: `8` */ duration?: number; /** * Whether to mask away the clip. */ mask_away_clip?: boolean; /** * The negative prompt to generate the audio for. Default value: `""` */ negative_prompt?: string; /** * The number of steps to generate the audio for. Default value: `25` */ num_steps?: number; /** * The prompt to generate the audio for. */ prompt: string; /** * The seed for the random number generator */ seed?: number; }; export type MochiV1Input = { /** * Whether to enable prompt expansion. Default value: `true` */ enable_prompt_expansion?: boolean; /** * The negative prompt for the video. Default value: `""` */ negative_prompt?: string; /** * The prompt to generate a video from. */ prompt: string; /** * The seed to use for generating the video. */ seed?: number; }; export type ModelUrls = { /** * FBX format 3D model */ fbx?: File; /** * GLB format 3D model */ glb?: File; /** * MTL material file for OBJ model */ mtl?: File; /** * OBJ format 3D model */ obj?: File; /** * Texture image for the 3D model */ texture?: File; /** * USDZ format 3D model */ usdz?: File; }; export type MoondreamBatchedInput = { /** * List of input prompts and image URLs */ inputs: Array; /** * Maximum number of new tokens to generate Default value: `64` */ max_tokens?: number; /** * Model ID to use for inference Default value: `"vikhyatk/moondream2"` */ model_id?: "vikhyatk/moondream2" | "fal-ai/moondream2-docci"; /** * Repetition penalty for sampling Default value: `1` */ repetition_penalty?: number; /** * Temperature for sampling Default value: `0.2` */ temperature?: number; /** * Top P for sampling Default value: `1` */ top_p?: number; }; export type MoondreamBatchedOutput = { /** * Filenames of the images processed */ filenames?: Array; /** * List of generated outputs */ outputs: Array; /** * Whether the output is partial */ partial?: boolean; /** * Timings for different parts of the process */ timings: unknown; }; export type MoondreamCaptionInput = { /** * URL of the image to be processed */ image_url: string | Blob | File; /** * Length of the caption to generate Default value: `"normal"` */ length?: "short" | "normal" | "long"; /** * Sampling temperature to use, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If not set, defaults to 0. */ temperature?: number; /** * Nucleus sampling probability mass to use, between 0 and 1. */ top_p?: number; }; export type MoondreamCaptionOutput = { /** * Reason for finishing the output generation */ finish_reason: string; /** * Generated caption for the image */ output: string; /** * Usage information for the request */ usage_info: UsageInfo; }; export type MoondreamDetectOutput = { /** * Reason for finishing the output generation */ finish_reason: string; /** * Image with bounding boxes drawn around detected objects */ image?: ImageFile; /** * List of detected objects with their bounding boxes */ objects: Array; /** * Usage information for the request */ usage_info: UsageInfo; }; export type MoondreamInputParam = { /** * URL of the image to be processed */ image_url: string | Blob | File; /** * Prompt to be used for the image Default value: `"Describe this image."` */ prompt?: string; }; export type MoondreamObjectInput = { /** * URL of the image to be processed */ image_url: string | Blob | File; /** * Object to be detected in the image */ object: string; }; export type MoondreamObjectOutput = { /** * Image with detected objects */ image: Image; /** * Objects detected in the image */ objects: Array; }; export type MoondreamOutput = { /** * Output for the given query */ output: string; }; export type MoondreamPointInput = { /** * URL of the image to be processed */ image_url: string | Blob | File; /** * Whether to preview the output */ preview?: boolean; /** * Object to be located in the image */ prompt: string; }; export type MoondreamPointOutput = { /** * Reason for finishing the output generation */ finish_reason: string; /** * Image with points drawn on detected objects */ image?: ImageFile; /** * List of points marking the detected objects */ points: Array; /** * Usage information for the request */ usage_info: UsageInfo; }; export type MoondreamQueryInput = { /** * URL of the image to be processed */ image_url: string | Blob | File; /** * Query to be asked in the image */ prompt: string; /** * Whether to include detailed reasoning behind the answer Default value: `true` */ reasoning?: boolean; /** * Sampling temperature to use, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If not set, defaults to 0. */ temperature?: number; /** * Nucleus sampling probability mass to use, between 0 and 1. */ top_p?: number; }; export type MoondreamQueryOutput = { /** * Reason for finishing the output generation */ finish_reason: string; /** * Answer to the query about the image */ output: string; /** * Detailed reasoning behind the answer, if enabled */ reasoning?: string; /** * Usage information for the request */ usage_info: UsageInfo; }; export type MoondreamSegementationInput = { /** * URL of the image to be processed */ image_url: string | Blob | File; /** * Object to be segmented in the image */ object: string; /** * Whether to preview the output and return a binary mask of the image */ preview?: boolean; /** * Sampling settings for the segmentation model */ settings?: SegmentSamplingSettings; /** * Spatial references to guide the segmentation. By feeding in references you can help the segmentation process. Must be either list of Point object with x and y members, or list of arrays containing either 2 floats (x,y) or 4 floats (x1,y1,x2,y2). * **NOTE**: You can also use the [**point endpoint**](https://fal.ai/models/fal-ai/moondream3-preview/point) to get points for the objects, and pass them in here. */ spatial_references?: Array>; }; export type MoondreamSegementationOutput = { /** * Bounding box of the segmented object. If not detected, will be null. */ bbox?: ObjectBoundingBox; /** * Reason for finishing the output generation */ finish_reason: string; /** * Segmentation mask image. If no object detected or preview not requested, will be null. */ image?: ImageFile; /** * SVG path data representing the segmentation mask. If not detected, will be null. */ path?: string; /** * Usage information for the request */ usage_info: UsageInfo; }; export type MulticonditioningVideoInput = { /** * The aspect ratio of the video. Default value: `"auto"` */ aspect_ratio?: "16:9" | "1:1" | "9:16" | "auto"; /** * Whether to expand the prompt using the LLM. */ enable_prompt_expansion?: boolean; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The frame rate of the video. Default value: `25` */ frames_per_second?: number; /** * The image conditions to use for generation. */ images?: Array; /** * The LoRA weights to use for generation. */ loras?: Array; /** * The negative prompt to use. Default value: `"blurry, low quality, low resolution, inconsistent motion, jittery, distorted"` */ negative_prompt?: string; /** * The number of inference steps to use. Default value: `30` */ num_inference_steps?: number; /** * The number of frames in the video. Default value: `89` */ number_of_frames?: number; /** * The prompt to generate the video from. */ prompt: string; /** * The resolution of the video. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Whether to reverse the video. */ reverse_video?: boolean; /** * The seed to use for generation. */ seed?: number; /** * The video conditions to use for generation. */ videos?: Array; }; export type MultiConditioningVideoInput = { /** * Aspect ratio of the generated video (16:9 or 9:16). Default value: `"16:9"` */ aspect_ratio?: "9:16" | "16:9"; /** * Whether to expand the prompt using the model's own capabilities. Default value: `true` */ expand_prompt?: boolean; /** * URL of images to use as conditioning */ images?: Array; /** * Negative prompt for generation Default value: `"worst quality, inconsistent motion, blurry, jittery, distorted"` */ negative_prompt?: string; /** * Number of inference steps Default value: `40` */ num_inference_steps?: number; /** * Text prompt to guide generation */ prompt: string; /** * Resolution of the generated video (480p or 720p). Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for generation */ seed?: number; /** * Videos to use as conditioning */ videos?: Array; }; export type MultiImageTo3DInput = { /** * Animation preset ID from Meshy's library (500+ presets). Only used when enable_animation is true. See https://docs.meshy.ai/en/api/animation-library for available action IDs. Default value: `1001` */ animation_action_id?: number; /** * Apply an animation preset to the rigged model. Requires enable_rigging to be true. */ enable_animation?: boolean; /** * Generate PBR Maps (metallic, roughness, normal) in addition to base color. Requires should_texture to be true. */ enable_pbr?: boolean; /** * Automatically rig the generated model as a humanoid character. Includes basic walking and running animations. Best results with humanoid characters that have clearly defined limbs. */ enable_rigging?: boolean; /** * If set to true, input data will be checked for safety before processing. Default value: `true` */ enable_safety_checker?: boolean; /** * 1 to 4 images for 3D model creation. All images should depict the same object from different angles. Supports .jpg, .jpeg, .png formats, and AVIF/HEIF which will be automatically converted. If more than 4 images are provided, only the first 4 will be used. */ image_urls: Array; /** * Pose mode for the generated model. 'a-pose' generates an A-pose, 't-pose' generates a T-pose, empty string for no specific pose. Default value: `""` */ pose_mode?: "a-pose" | "t-pose" | ""; /** * Approximate height of the character in meters. Only used when enable_rigging is true. Default value: `1.7` */ rigging_height_meters?: number; /** * Whether to enable the remesh phase. When false, returns triangular mesh ignoring topology and target_polycount. Default value: `true` */ should_remesh?: boolean; /** * Whether to generate textures. False provides mesh without textures for 5 credits, True adds texture generation for additional 10 credits. Default value: `true` */ should_texture?: boolean; /** * Controls symmetry behavior during model generation. Default value: `"auto"` */ symmetry_mode?: "off" | "auto" | "on"; /** * Target number of polygons in the generated model Default value: `30000` */ target_polycount?: number; /** * 2D image to guide the texturing process. Requires should_texture to be true. */ texture_image_url?: string | Blob | File; /** * Text prompt to guide the texturing process. Requires should_texture to be true. */ texture_prompt?: string; /** * Specify the topology of the generated model. Quad for smooth surfaces, Triangle for detailed geometry. Default value: `"triangle"` */ topology?: "quad" | "triangle"; }; export type MultipleAnglesInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable the safety checker for the generated image. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale. Controls how closely the model follows the prompt. Default value: `1` */ guidance_scale?: number; /** * The size of the generated image. If not provided, the size of the final input image will be used. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to adjust camera angle for. */ image_urls: Array; /** * The scale factor for the LoRA model. Controls the strength of the camera control effect. Default value: `1.25` */ lora_scale?: number; /** * Move camera forward (0=no movement, 10=close-up) */ move_forward?: number; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `6` */ num_inference_steps?: number; /** * The format of the output image Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * Rotate camera left (positive) or right (negative) in degrees. Positive values rotate left, negative values rotate right. */ rotate_right_left?: number; /** * Random seed for reproducibility. Same seed with same prompt will produce same result. */ seed?: number; /** * If `True`, the media will be returned as a data URI and won't be saved in history. */ sync_mode?: boolean; /** * Adjust vertical camera angle (-1=bird's-eye view/looking down, 0=neutral, 1=worm's-eye view/looking up) */ vertical_angle?: number; /** * Enable wide-angle lens effect */ wide_angle_lens?: boolean; }; export type MultishotMasterInput = { /** * Aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16"; /** * Enable safety checker for input/output content. Default value: `true` */ enable_safety_checker?: boolean; /** * Frames per second of the output video. Default value: `16` */ frames_per_second?: number; /** * Classifier-free guidance scale. Default value: `5` */ guidance_scale?: number; /** * Negative prompt describing undesired content in the generated video. Default value: `"bright colors, overexposed, static, blurred details, subtitles, style, artwork, painting, picture, still, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, malformed limbs, fused fingers, still picture, cluttered background, three legs, many people in the background, walking backwards"` */ negative_prompt?: string; /** * Number of denoising steps. Higher values produce better quality but take longer. Default value: `50` */ num_inference_steps?: number; /** * Global story caption describing the overall scene, subjects, setting, and visual style. This provides inter-shot consistency. */ prompt: string; /** * Resolution of the generated video. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. */ seed?: number; /** * List of shots to generate. Each shot has its own caption and frame count. Maximum 5 shots with a combined maximum of 308 frames. */ shots: Array; }; export type MultiviewTo3dInput = { /** * Automatically scale the model to real-world dimensions, with the unit in meters. The default value is False. */ auto_size?: boolean; /** * Back view image of the object. */ back_image_url?: string | Blob | File; /** * Limits the number of faces on the output model. If this option is not set, the face limit will be adaptively determined. */ face_limit?: number; /** * Front view image of the object. */ front_image_url: string | Blob | File; /** * Left view image of the object. */ left_image_url?: string | Blob | File; /** * Set orientation=align_image to automatically rotate the model to align the original image. The default value is default. Default value: `"default"` */ orientation?: "default" | "align_image"; /** * A boolean option to enable pbr. The default value is True, set False to get a model without pbr. If this option is set to True, texture will be ignored and used as True. */ pbr?: boolean; /** * Set True to enable quad mesh output (extra $0.05 per generation). If quad=True and face_limit is not set, the default face_limit will be 10000. Note: Enabling this option will force the output to be an FBX model. */ quad?: boolean; /** * Right view image of the object. */ right_image_url?: string | Blob | File; /** * This is the random seed for model generation. The seed controls the geometry generation process, ensuring identical models when the same seed is used. This parameter is an integer and is randomly chosen if not set. */ seed?: number; /** * An option to enable texturing. Default is 'standard', set 'no' to get a model without any textures, and set 'HD' to get a model with hd quality textures. Default value: `"standard"` */ texture?: "no" | "standard" | "HD"; /** * Determines the prioritization of texture alignment in the 3D model. The default value is original_image. Default value: `"original_image"` */ texture_alignment?: "original_image" | "geometry"; /** * This is the random seed for texture generation. Using the same seed will produce identical textures. This parameter is an integer and is randomly chosen if not set. If you want a model with different textures, please use same seed and different texture_seed. */ texture_seed?: number; }; export type MusePoseInput = { /** * Classifier free guidance Default value: `3.5` */ cfg?: number; /** * The frame to align the pose to. */ dwpose_align_frame?: number; /** * The resolution to use for the pose detection. Default value: `512` */ dwpose_detection_resolution?: number; /** * The resolution to use for the image during pose calculation. Default value: `720` */ dwpose_image_resolution?: number; /** * The frames per second of the output video. */ fps?: number; /** * The height of the output video. Default value: `748` */ height?: number; /** * URL of the image to animate. */ image_url: string | Blob | File; /** * The length of the output video. Default value: `300` */ length?: number; /** * The video slice overlap frame number Default value: `4` */ overlap?: number; /** * The seed to use for the random number generator. */ seed?: number; /** * Number of input frames to skip. Skipping 1 effectively reduces the fps in half. Default value: `1` */ skip?: number; /** * The video slice frame number Default value: `48` */ slice?: number; /** * DDIM sampling steps Default value: `20` */ steps?: number; /** * The URL of the video to drive the animation */ video_url: string | Blob | File; /** * The width of the output video. Default value: `748` */ width?: number; }; export type musetalkInput = { /** * URL of the audio */ audio_url: string | Blob | File; /** * URL of the source video */ source_video_url: string | Blob | File; }; export type MusicCompositionPlan = { /** * The styles that should not be present in the entire song. */ negative_global_styles: Array; /** * The styles that should be present in the entire song. */ positive_global_styles: Array; /** * The sections of the song. */ sections: Array; }; export type MusicGeneratorInput = { /** * The duration of the generated music in seconds. */ duration: number; /** * The prompt to generate music from. */ prompt: string; }; export type MusicSection = { /** * The duration of the section in milliseconds. Must be between 3000ms and 120000ms. */ duration_ms: number; /** * The lyrics of the section. Each line must be at most 200 characters long. */ lines: Array; /** * The styles that should not be present in this section. */ negative_local_styles: Array; /** * The styles that should be present in this section. */ positive_local_styles: Array; /** * The name of the section. Must be between 1 and 100 characters. */ section_name: string; }; export type NafnetInput = { /** * URL of image to be used for relighting */ image_url: string | Blob | File; /** * seed to be used for generation */ seed?: number; }; export type NanoBanana2EditInput = { /** * The aspect ratio of the generated image. Supports extreme ratios: 4:1, 1:4, 8:1, 1:8. Default value: `auto` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "3:2" | "4:3" | "5:4" | "1:1" | "4:5" | "3:4" | "2:3" | "9:16" | "4:1" | "1:4" | "8:1" | "1:8"; /** * Enable web search for the image generation task. This will allow the model to use the latest information from the web to generate the image. */ enable_web_search?: boolean; /** * The URLs of the images to use for image-to-image generation or image editing. */ image_urls: Array; /** * Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate and ignore any intermediate images generated by the model. This may affect generation quality. Default value: `true` */ limit_generations?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt for image editing. */ prompt: string; /** * The resolution of the image to generate. Default value: `"1K"` */ resolution?: "0.5K" | "1K" | "2K" | "4K"; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * When set, enables model thinking with the given level ('minimal' or 'high') and includes thoughts in the generation. Omit to disable. */ thinking_level?: "minimal" | "high"; }; export type NanoBanana2Input = { /** * The aspect ratio of the generated image. Supports extreme ratios: 4:1, 1:4, 8:1, 1:8. Use "auto" to let the model decide based on the prompt. Default value: `auto` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "3:2" | "4:3" | "5:4" | "1:1" | "4:5" | "3:4" | "2:3" | "9:16" | "4:1" | "1:4" | "8:1" | "1:8"; /** * Enable web search for the image generation task. This will allow the model to use the latest information from the web to generate the image. */ enable_web_search?: boolean; /** * Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate and ignore any intermediate images generated by the model. This may affect generation quality. Default value: `true` */ limit_generations?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The text prompt to generate an image from. */ prompt: string; /** * The resolution of the image to generate. Default value: `"1K"` */ resolution?: "0.5K" | "1K" | "2K" | "4K"; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * When set, enables model thinking with the given level ('minimal' or 'high') and includes thoughts in the generation. Omit to disable. */ thinking_level?: "minimal" | "high"; }; export type NanoBananaEditInput = { /** * The aspect ratio of the generated image. Default value: `auto` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "3:2" | "4:3" | "5:4" | "1:1" | "4:5" | "3:4" | "2:3" | "9:16"; /** * The URLs of the images to use for image-to-image generation or image editing. */ image_urls: Array; /** * Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate. */ limit_generations?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt for image editing. */ prompt: string; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type NanoBananaInput = { /** * The aspect ratio of the generated image. Default value: `"1:1"` */ aspect_ratio?: "21:9" | "16:9" | "3:2" | "4:3" | "5:4" | "1:1" | "4:5" | "3:4" | "2:3" | "9:16"; /** * Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate. */ limit_generations?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The text prompt to generate an image from. */ prompt: string; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type NanoBananaOutput = { /** * The description of the generated images. */ description: string; /** * The generated images. */ images: Array; }; export type NanoBananaProEditInput = { /** * The aspect ratio of the generated image. Default value: `auto` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "3:2" | "4:3" | "5:4" | "1:1" | "4:5" | "3:4" | "2:3" | "9:16"; /** * Enable web search for the image generation task. This will allow the model to use the latest information from the web to generate the image. */ enable_web_search?: boolean; /** * The URLs of the images to use for image-to-image generation or image editing. */ image_urls: Array; /** * Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate. */ limit_generations?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt for image editing. */ prompt: string; /** * The resolution of the image to generate. Default value: `"1K"` */ resolution?: "1K" | "2K" | "4K"; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type NanoBananaProInput = { /** * The aspect ratio of the generated image. Default value: `1:1` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "3:2" | "4:3" | "5:4" | "1:1" | "4:5" | "3:4" | "2:3" | "9:16"; /** * Enable web search for the image generation task. This will allow the model to use the latest information from the web to generate the image. */ enable_web_search?: boolean; /** * Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate. */ limit_generations?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The text prompt to generate an image from. */ prompt: string; /** * The resolution of the image to generate. Default value: `"1K"` */ resolution?: "1K" | "2K" | "4K"; /** * The safety tolerance level for content moderation. 1 is the most strict (blocks most content), 6 is the least strict. Default value: `"4"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The seed for the random number generator. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type NemotronAsrInput = { /** * Controls the speed/accuracy trade-off. 'none' = best accuracy (1.12s chunks, ~7.16% WER), 'low' = balanced (0.56s chunks, ~7.22% WER), 'medium' = faster (0.16s chunks, ~7.84% WER), 'high' = fastest (0.08s chunks, ~8.53% WER). Default value: `"none"` */ acceleration?: "none" | "low" | "medium" | "high"; /** * URL of the audio file. */ audio_url: string | Blob | File; }; export type NextSceneInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable the safety checker for the generated image. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale. Controls how closely the model follows the prompt. Default value: `1` */ guidance_scale?: number; /** * The size of the generated image. If not provided, the size of the final input image will be used. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to create the next scene from. */ image_urls: Array; /** * The scale factor for the LoRA model. Controls the strength of the LoRA effect. Default value: `1` */ lora_scale?: number; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `6` */ num_inference_steps?: number; /** * The format of the output image Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * Describe the camera movement, framing change, or scene transition. Start with 'Next Scene:' for best results. Examples: camera movements (dolly, push-in, pull-back), framing changes (wide to close-up), new elements entering frame. Default value: `"Next Scene: The camera moves forward revealing more of the scene"` */ prompt?: string; /** * Random seed for reproducibility. Same seed with same prompt will produce same result. */ seed?: number; /** * If `True`, the media will be returned as a data URI and won't be saved in history. */ sync_mode?: boolean; }; export type Nextstep1Input = { /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details */ negative_prompt: string; /** * The prompt to edit the image. */ prompt: string; }; export type NovaSrInput = { /** * The format for the output audio. Default value: `"mp3"` */ audio_format?: "mp3" | "aac" | "m4a" | "ogg" | "opus" | "flac" | "wav"; /** * The URL of the audio file to enhance. */ audio_url: string | Blob | File; /** * The bitrate of the output audio. Default value: `"192k"` */ bitrate?: string; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type NovaSrOutput = { /** * The enhanced audio file. */ audio: AudioFile; /** * Timings for each step in the pipeline. */ timings: NovaSRTimings; }; export type O3ProEditVideoV2VInput = { /** * Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2. */ elements?: Array; /** * Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video. */ image_urls?: Array; /** * Whether to keep the original audio from the reference video. Default value: `true` */ keep_audio?: boolean; /** * Text prompt for video generation. Reference video as @Video1. */ prompt: string; /** * The type of multi-shot video generation. Default value: `"customize"` */ shot_type?: string; /** * Reference video URL. Only .mp4/.mov formats, 3-10s duration, 720-2160px resolution, max 200MB. */ video_url: string | Blob | File; }; export type O3ProImageToVideoInput = { /** * Video duration in seconds (3-15s). Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * URL of the end frame image (optional). */ end_image_url?: string | Blob | File; /** * Whether to generate native audio for the video. */ generate_audio?: boolean; /** * URL of the start frame image. */ image_url: string | Blob | File; /** * List of prompts for multi-shot video generation. */ multi_prompt?: Array; /** * Text prompt for video generation. Either prompt or multi_prompt must be provided, but not both. */ prompt?: string; /** * The type of multi-shot video generation. Default value: `"customize"` */ shot_type?: string; }; export type O3ProReferenceVideoI2VInput = { /** * The aspect ratio of the generated video frame. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Video duration in seconds (3-15s). Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2. */ elements?: Array; /** * Image to use as the last frame of the video. */ end_image_url?: string | Blob | File; /** * Whether to generate native audio for the video. */ generate_audio?: boolean; /** * Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video. */ image_urls?: Array; /** * List of prompts for multi-shot video generation. */ multi_prompt?: Array; /** * Text prompt for video generation. Either prompt or multi_prompt must be provided, but not both. */ prompt?: string; /** * The type of multi-shot video generation. Default value: `"customize"` */ shot_type?: string; /** * Image to use as the first frame of the video. */ start_image_url?: string | Blob | File; }; export type O3ProReferenceVideoV2VInput = { /** * Aspect ratio. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16" | "1:1"; /** * Video duration in seconds (3-15s for reference video). */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2. */ elements?: Array; /** * Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video. */ image_urls?: Array; /** * Whether to keep the original audio from the reference video. Default value: `true` */ keep_audio?: boolean; /** * Text prompt for video generation. Reference video as @Video1. */ prompt: string; /** * The type of multi-shot video generation. Default value: `"customize"` */ shot_type?: string; /** * Reference video URL. Only .mp4/.mov formats, 3-10s duration, 720-2160px resolution, max 200MB. */ video_url: string | Blob | File; }; export type O3ProTextToVideoInput = { /** * Aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Video duration in seconds (3-15s). Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * Whether to generate native audio for the video. */ generate_audio?: boolean; /** * List of prompts for multi-shot video generation. */ multi_prompt?: Array; /** * Text prompt for video generation. Required unless multi_prompt is provided. */ prompt?: string; /** * The type of multi-shot video generation. Default value: `"customize"` */ shot_type?: string; }; export type ObjectBoundingBox = { /** * Right boundary of detection box in normalized format (0 to 1) */ x_max: number; /** * Left boundary of detection box in normalized format (0 to 1) */ x_min: number; /** * Bottom boundary of detection box in normalized format (0 to 1) */ y_max: number; /** * Top boundary of detection box in normalized format (0 to 1) */ y_min: number; }; export type ObjectOutput = { /** * Generated 3D object file. */ model_mesh: File; /** * Seed value used for generation. */ seed: number; /** * Generated textures for the 3D object. */ textures: Array; }; export type OCRBoundingBox = { /** * List of quadrilateral boxes */ quad_boxes: Array; }; export type OmnigenV1Input = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The Image Guidance scale is a measure of how close you want * the model to stick to your input image when looking for a related image to show you. Default value: `1.6` */ img_guidance_scale?: number; /** * URL of images to use while generating the image, Use <|image_1|> for the first image and so on. */ input_image_urls?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type OmnigenV2Input = { /** * CFG range end value. Default value: `1` */ cfg_range_end?: number; /** * CFG range start value. */ cfg_range_start?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The Image Guidance scale controls how closely the model follows the input images. * For image editing: 1.3-2.0, for in-context generation: 2.0-3.0 Default value: `2` */ image_guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URLs of input images to use for image editing or multi-image generation. Support up to 3 images. */ input_image_urls?: Array; /** * Negative prompt to guide what should not be in the image. Default value: `"(((deformed))), blurry, over saturation, bad anatomy, disfigured, poorly drawn face, mutation, mutated, (extra_limb), (ugly), (poorly drawn hands), fused fingers, messy drawing, broken legs censor, censored, censor_bar"` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate or edit an image. Use specific language like 'Add the bird from image 1 to the desk in image 2' for better results. */ prompt: string; /** * The scheduler to use for the diffusion process. Default value: `"euler"` */ scheduler?: "euler" | "dpmsolver"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The Text Guidance scale controls how closely the model follows the text prompt. * Higher values make the model stick more closely to the prompt. Default value: `5` */ text_guidance_scale?: number; }; export type OmniHumanInput = { /** * The URL of the audio file to generate the video. Audio must be under 30s long. */ audio_url: string | Blob | File; /** * The URL of the image used to generate the video */ image_url: string | Blob | File; }; export type OmniHumanv15Input = { /** * The URL of the audio file to generate the video. Audio must be under 30s long for 1080p generation and under 60s long for 720p generation. */ audio_url: string | Blob | File; /** * The URL of the image used to generate the video */ image_url: string | Blob | File; /** * The text prompt used to guide the video generation. */ prompt?: string; /** * The resolution of the generated video. Defaults to 1080p. 720p generation is faster and higher in quality. 1080p generation is limited to 30s audio and 720p generation is limited to 60s audio. Default value: `"1080p"` */ resolution?: "720p" | "1080p"; /** * Generate a video at a faster rate with a slight quality trade-off. */ turbo_mode?: boolean; }; export type omnilottieInput = { /** * Maximum number of Lottie tokens to generate. Default value: `4096` */ max_tokens?: number; /** * Text description of the Lottie animation to generate. */ prompt: string; /** * Sampling temperature for generation. Default value: `0.9` */ temperature?: number; /** * Top-k sampling parameter. Default value: `5` */ top_k?: number; /** * Nucleus sampling probability threshold. Default value: `0.25` */ top_p?: number; }; export type omnilottieOutput = { /** * The generated Lottie animation as a JSON file. */ lottie_file: File; }; export type omnipartInput = { /** * Guidance scale for the model. Default value: `7.5` */ guidance_scale?: number; /** * URL of image to use while generating the 3D model. */ input_image_url: string | Blob | File; /** * Minimum segment size (pixels) for the model. Default value: `2000` */ minimum_segment_size?: number; /** * Specify which segments to merge (e.g., '0,1;3,4' merges segments 0&1 together and 3&4 together) Default value: `""` */ parts?: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. Default value: `765464` */ seed?: number; }; export type omnipartOutput = { /** * Generated 3D object file. */ full_model_mesh: File; /** * Generated 3D object file. */ model_mesh: File; /** * All outputs file. */ output_zip: File; /** * Seed value used for generation. */ seed: number; }; export type OmniV2VEditInput = { /** * Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2, etc. Maximum 4 total (elements + reference images) when using video. */ elements?: Array; /** * Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video. */ image_urls?: Array; /** * Whether to keep the original audio from the video. */ keep_audio?: boolean; /** * Use @Element1, @Element2 to reference elements and @Image1, @Image2 to reference images in order. */ prompt: string; /** * Reference video URL. Only .mp4/.mov formats supported, 3-10 seconds duration, 720-2160px resolution, max 200MB. * * Max file size: 200.0MB, Min width: 720px, Min height: 720px, Max width: 2160px, Max height: 2160px, Min duration: 3.0s, Max duration: 10.05s, Min FPS: 24.0, Max FPS: 60.0, Timeout: 30.0s */ video_url: string | Blob | File; }; export type OmniV2VReferenceInput = { /** * The aspect ratio of the generated video frame. If 'auto', the aspect ratio will be determined automatically based on the input video, and the closest aspect ratio to the input video will be used. Default value: `"auto"` */ aspect_ratio?: "auto" | "16:9" | "9:16" | "1:1"; /** * Video duration in seconds. Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10"; /** * Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2, etc. Maximum 4 total (elements + reference images) when using video. */ elements?: Array; /** * Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video. */ image_urls?: Array; /** * Whether to keep the original audio from the video. */ keep_audio?: boolean; /** * Use @Element1, @Element2 to reference elements and @Image1, @Image2 to reference images in order. */ prompt: string; /** * Reference video URL. Only .mp4/.mov formats supported, 3-10 seconds duration, 720-2160px resolution, max 200MB. * * Max file size: 200.0MB, Min width: 720px, Min height: 720px, Max width: 2160px, Max height: 2160px, Min duration: 3.0s, Max duration: 10.05s, Min FPS: 24.0, Max FPS: 60.0, Timeout: 30.0s */ video_url: string | Blob | File; }; export type OmniVideoElementInput = { /** * The frontal image of the element (main view). * * Max file size: 10.0MB, Min width: 300px, Min height: 300px, Min aspect ratio: 0.40, Max aspect ratio: 2.50, Timeout: 20.0s */ frontal_image_url: string | Blob | File; /** * Additional reference images from different angles. 1-3 images supported. At least one image is required. */ reference_image_urls?: Array; }; export type OmniVideoImageToVideoInput = { /** * Video duration in seconds. Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10"; /** * Image to use as the last frame of the video. */ end_image_url?: string | Blob | File; /** * Use @Image1 to reference the start frame, @Image2 to reference the end frame. */ prompt: string; /** * Image to use as the first frame of the video. * * Max file size: 10.0MB, Min width: 300px, Min height: 300px, Min aspect ratio: 0.40, Max aspect ratio: 2.50, Timeout: 20.0s */ start_image_url: string | Blob | File; }; export type OmniVideoReferenceToVideoInput = { /** * The aspect ratio of the generated video frame. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1"; /** * Video duration in seconds. Default value: `"5"` */ duration?: "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10"; /** * Elements (characters/objects) to include in the video. Reference in prompt as @Element1, @Element2, etc. Maximum 7 total (elements + reference images + start image). */ elements?: Array; /** * Additional reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 7 total (elements + reference images + start image). */ image_urls?: Array; /** * Take @Element1, @Element2 to reference elements and @Image1, @Image2 to reference images in order. */ prompt: string; }; export type OmniZeroInput = { /** * Composition image url. */ composition_image_url: string | Blob | File; /** * Composition strength. Default value: `1` */ composition_strength?: number; /** * Depth strength. Default value: `0.5` */ depth_strength?: number; /** * Face strength. Default value: `1` */ face_strength?: number; /** * Guidance scale. Default value: `5` */ guidance_scale?: number; /** * Identity image url. */ identity_image_url: string | Blob | File; /** * Identity strength. Default value: `1` */ identity_strength?: number; /** * Image strength. Default value: `0.75` */ image_strength?: number; /** * Input image url. */ image_url: string | Blob | File; /** * Negative prompt to guide the image generation. Default value: `""` */ negative_prompt?: string; /** * Number of images. Default value: `1` */ number_of_images?: number; /** * Prompt to guide the image generation. */ prompt: string; /** * Seed. Default value: `42` */ seed?: number; /** * Style image url. */ style_image_url: string | Blob | File; /** * Style strength. Default value: `1` */ style_strength?: number; }; export type onerewardInput = { /** * Acceleration level for image generation. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If True, runs a safety checker on the output and filters NSFW content. Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance scale for the base diffusion process. The model uses true_cfg for classifier-free guidance; this controls the distillation guidance. Default value: `1` */ guidance_scale?: number; /** * URL of the source image to edit. */ image_url: string | Blob | File; /** * URL of the mask image. White pixels indicate the area to fill or modify; black pixels preserve the original content. */ mask_url: string | Blob | File; /** * Text describing what to avoid in the generated output. Default value: `"nsfw"` */ negative_prompt?: string; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * Number of denoising steps. More steps generally produce higher quality results. Default value: `28` */ num_inference_steps?: number; /** * Format of the output image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * Text description of what to generate in the masked area. */ prompt?: string; /** * Random seed for reproducibility. If None, a random seed is used. */ seed?: number; /** * If True, returns the image as a data URI instead of uploading to CDN. The image will not be available in the request history. */ sync_mode?: boolean; /** * True classifier-free guidance scale. Controls how strongly the model follows the prompt. Values above 1.0 enable CFG. Default value: `4` */ true_cfg?: number; }; export type OrpheusTtsInput = { /** * Repetition penalty (>= 1.1 required for stable generations). Default value: `1.2` */ repetition_penalty?: number; /** * Temperature for generation (higher = more creative). Default value: `0.7` */ temperature?: number; /** * The text to be converted to speech. You can additionally add the following emotive tags: , , , , , , , */ text: string; /** * Voice ID for the desired voice. Default value: `"tara"` */ voice?: "tara" | "leah" | "jess" | "leo" | "dan" | "mia" | "zac" | "zoe"; }; export type OutpaintInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Number of pixels to add as black margin on the bottom side (0-700). Default value: `400` */ expand_bottom?: number; /** * Number of pixels to add as black margin on the left side (0-700). */ expand_left?: number; /** * Number of pixels to add as black margin on the right side (0-700). */ expand_right?: number; /** * Number of pixels to add as black margin on the top side (0-700). */ expand_top?: number; /** * Image URL to outpaint */ image_url: string | Blob | File; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the output image. Default value: `"png"` */ output_format?: "png" | "jpeg" | "jpg" | "webp"; /** * Optional prompt to guide the outpainting. If provided, it will be appended to the base outpaint instruction. Example: 'with a beautiful sunset in the background' Default value: `""` */ prompt?: string; /** * If True, the function will wait for the image to be generated and uploaded before returning the response. If False, the function will return immediately and the image will be generated asynchronously. */ sync_mode?: boolean; /** * Percentage to zoom out the image. If set, the image will be scaled down by this percentage and black margins will be added to maintain original size. Example: 50 means the image will be 50% of original size with black margins filling the rest. Default value: `20` */ zoom_out_percentage?: number; }; export type Output = { /** * The prompt used to generate the audio. */ prompt: string; /** * The generated video with audio. */ video: File; }; export type OutputFormat = { /** * Bit rate in bps. Only applicable for MP3 codec. Defaults to 128000 for MP3. */ bit_rate?: "32000" | "64000" | "96000" | "128000" | "192000"; /** * Audio codec. Supported: mp3, wav, pcm, mulaw, alaw. Default value: `"mp3"` */ codec?: "mp3" | "wav" | "pcm" | "mulaw" | "alaw"; /** * Sample rate in Hz. Default value: `"24000"` */ sample_rate?: "8000" | "16000" | "22050" | "24000" | "44100" | "48000"; }; export type OverlayImageInput = { /** * The URL of the background image */ background_image_url: string | Blob | File; /** * Opacity of overlay image (0.0-1.0) Default value: `1` */ opacity?: number; /** * Output format for the result image Default value: `"png"` */ output_format?: "png" | "jpg" | "jpeg" | "webp"; /** * The URL of the overlay image */ overlay_image_url: string | Blob | File; /** * Scale of overlay image as percentage (25-200) Default value: `100` */ scale_percent?: number; /** * Color of stroke/border Default value: `"black"` */ stroke_color?: "black" | "white" | "red" | "green" | "blue" | "yellow" | "orange" | "purple" | "pink" | "brown" | "gray" | "cyan" | "magenta"; /** * Width of stroke/border around overlay in pixels (0 for no stroke) */ stroke_width?: number; /** * X position of overlay center as percentage of background width (0-100) Default value: `50` */ x_percent?: number; /** * Y position of overlay center as percentage of background height (0-100) Default value: `50` */ y_percent?: number; }; export type OverlayVideoInput = { /** * Which audio source to use. 'main' uses only the main video's audio, 'overlay' uses only the overlay video's audio, 'both' mixes audio from both videos together. When set, overrides include_overlay_audio. */ audio_source?: "main" | "overlay" | "both"; /** * Blend mode for compositing the overlay video. 'normal' places overlay on top, 'screen' makes dark areas transparent (good for overlays with black backgrounds), 'multiply' makes light areas transparent (good for overlays with white backgrounds). Default value: `"normal"` */ blend_mode?: "normal" | "screen" | "multiply"; /** * Blend amount for chroma key edges (0.0-1.0). Lower values create harder edges, higher values create softer, more gradual transparency. Default value: `0.1` */ chroma_key_blend?: number; /** * Hex color to key out (remove) from overlay video. Use for green screen removal. Examples: '00FF00' for green, '0000FF' for blue, '000000' for black. Only used when enable_chroma_key is True. */ chroma_key_color?: string; /** * Similarity threshold for chroma key color matching (0.01-1.0). Lower values match colors more precisely, higher values match a wider range of similar colors. Default value: `0.3` */ chroma_key_similarity?: number; /** * Enable chroma key (color removal) on the overlay video. When disabled, all chroma key settings are ignored. */ enable_chroma_key?: boolean; /** * Include audio from overlay video. When enabled, audio from both videos will be mixed together. When disabled, only main video audio is used. Ignored when audio_source is set. */ include_overlay_audio?: boolean; /** * URL of the main/background video */ main_video_url: string | Blob | File; /** * Opacity of overlay video (0.0-1.0) Default value: `1` */ opacity?: number; /** * URL of the overlay video to place on top */ overlay_video_url: string | Blob | File; /** * Scale of overlay video as percentage (10-200) Default value: `35` */ scale_percent?: number; /** * End output when the shortest input ends Default value: `true` */ shortest?: boolean; /** * X position of overlay center as percentage of background width (0-100) Default value: `14` */ x_percent?: number; /** * Y position of overlay center as percentage of background height (0-100) Default value: `13` */ y_percent?: number; }; export type OvisImageInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the image generation. Default value: `5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ParabolizeInput = { /** * URL of image to process */ image_url: string | Blob | File; /** * Parabolize coefficient Default value: `1` */ parabolize_coeff?: number; /** * Vertex X position Default value: `0.5` */ vertex_x?: number; /** * Vertex Y position Default value: `0.5` */ vertex_y?: number; }; export type PartInput = { /** * URL of FBX file to split into parts. ONLY FBX format supported. Max size: 100MB, face count ≤30,000. Recommended: AIGC-generated models. */ input_file_url: string | Blob | File; }; export type PartOutput = { /** * List of generated part files in FBX format */ result_files: Array; }; export type pasdInput = { /** * ControlNet conditioning scale (0.1-1.0) Default value: `0.8` */ conditioning_scale?: number; /** * Guidance scale for diffusion (1.0-20.0) Default value: `7` */ guidance_scale?: number; /** * Input image to super-resolve */ image_url: string | Blob | File; /** * Negative prompt to avoid unwanted artifacts Default value: `"blurry, dirty, messy, frames, deformed, dotted, noise, raster lines, unclear, lowres, over-smoothed, painting, ai generated"` */ negative_prompt?: string; /** * Additional prompt to guide super-resolution Default value: `""` */ prompt?: string; /** * Upscaling factor (1-4x) Default value: `2` */ scale?: number; /** * Number of inference steps (10-50) Default value: `25` */ steps?: number; }; export type pasdOutput = { /** * The generated super-resolved images */ images: Array; /** * Timing information for different processing stages */ timings?: unknown; }; export type personaplexInput = { /** * URL to the input audio file (user's speech). */ audio_url: string | Blob | File; /** * Text prompt describing the AI persona and conversation context. Default value: `"You are a wise and friendly teacher. Answer questions or provide advice in a clear and engaging way."` */ prompt?: string; /** * Random seed for reproducibility. */ seed?: number; /** * Audio sampling temperature. Higher values produce more diverse outputs. Default value: `0.8` */ temperature_audio?: number; /** * Text sampling temperature. Higher values produce more diverse outputs. Default value: `0.7` */ temperature_text?: number; /** * Top-K sampling for audio tokens. Default value: `250` */ top_k_audio?: number; /** * Top-K sampling for text tokens. Default value: `25` */ top_k_text?: number; /** * Voice ID for the AI response. NAT = natural, VAR = variety. F = female, M = male. Ignored when voice_audio_url is provided. Default value: `"NATF2"` */ voice?: "NATF0" | "NATF1" | "NATF2" | "NATF3" | "NATM0" | "NATM1" | "NATM2" | "NATM3" | "VARF0" | "VARF1" | "VARF2" | "VARF3" | "VARF4" | "VARM0" | "VARM1" | "VARM2" | "VARM3" | "VARM4"; /** * URL to a voice sample audio for on-the-fly voice cloning. When provided, the AI responds in the cloned voice instead of the preset 'voice'. 10+ seconds of clear speech recommended. Billed at 2x rate. */ voice_audio_url?: string | Blob | File; }; export type personaplexOutput = { /** * The generated AI response audio (WAV, 24kHz). */ audio: File; /** * Duration of the generated audio in seconds. */ duration: number; /** * The seed used for generation. */ seed: number; /** * Transcribed text of the AI's response. */ text: string; }; export type PersonaplexRealtimeInput = { /** * Input audio chunk (PCM s16le, 24kHz mono). Base64-encoded in JSON transport. */ audio: string; /** * Text prompt describing the AI persona and conversation context. Default value: `"You are a wise and friendly teacher. Answer questions or provide advice in a clear and engaging way."` */ prompt?: string; /** * Random seed for reproducibility. */ seed?: number; /** * Audio sampling temperature. Higher values produce more diverse outputs. Default value: `0.8` */ temperature_audio?: number; /** * Text sampling temperature. Higher values produce more diverse outputs. Default value: `0.7` */ temperature_text?: number; /** * Top-K sampling for audio tokens. Default value: `250` */ top_k_audio?: number; /** * Top-K sampling for text tokens. Default value: `25` */ top_k_text?: number; /** * Voice ID for the AI response. NAT = natural, VAR = variety. F = female, M = male. Ignored when voice_audio_url is provided. Default value: `"NATF2"` */ voice?: "NATF0" | "NATF1" | "NATF2" | "NATF3" | "NATM0" | "NATM1" | "NATM2" | "NATM3" | "VARF0" | "VARF1" | "VARF2" | "VARF3" | "VARF4" | "VARM0" | "VARM1" | "VARM2" | "VARM3" | "VARM4"; /** * URL to a voice sample audio for on-the-fly voice cloning. When provided, the AI responds in the cloned voice instead of the preset 'voice'. 10+ seconds of clear speech recommended. Billed at 2x rate. Default value: `"null"` */ voice_audio_url?: string | Blob | File; }; export type PersonaplexRealtimeOutput = { /** * Generated audio chunk (PCM s16le, 24kHz mono). Base64-encoded in JSON transport. */ audio: string; /** * Generated text tokens for this chunk. Default value: `""` */ text?: string; }; export type PerspectiveInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Image URL for perspective change */ image_url: string | Blob | File; /** * Default value: `"front"` */ target_perspective?: "front" | "left_side" | "right_side" | "back" | "top_down" | "bottom_up" | "birds_eye" | "three_quarter_left" | "three_quarter_right"; }; export type PhotaCreateProfileInput = { /** * URL to a ZIP archive containing the profile images. */ image_data_url: string | Blob | File; }; export type PhotaEditInput = { /** * Aspect ratio of the generated image. Default value: `"auto"` */ aspect_ratio?: "auto" | "1:1" | "16:9" | "4:3" | "3:4" | "9:16"; /** * List of URLs/ Base64 data URIs of the images to edit. A maximum of 10 images are supported, additional images will be ignored. */ image_urls?: Array; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png" | "webp"; /** * List of profile IDs to use for the image generation. Profiles may be tagged in the prompt as @Profile1, @Profile2, etc. */ profile_ids?: Array; /** * Text description of the desired image. To refer to specific profiles, use [[profile_id_1]], [[profile_id_2]], etc. */ prompt: string; /** * Resolution of the generated image. Default value: `"1K"` */ resolution?: "1K" | "4K"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type PhotaEnhanceInput = { /** * URL/Base64 data URI of the image to enhance. */ image_url: string | Blob | File; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png" | "webp"; /** * List of profile IDs to use for the image enhancement. The profiles sent over will be used as candidates for identity preservation. */ profile_ids?: Array; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type photaInput = { /** * Aspect ratio of the generated image. Default value: `"auto"` */ aspect_ratio?: "auto" | "1:1" | "16:9" | "4:3" | "3:4" | "9:16"; /** * Number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text description of the desired image. In case you wish to use specific profiles, refer to them as [[profile_id_1]], [[profile_id_2]], etc. */ prompt: string; /** * Resolution of the generated image. Default value: `"1K"` */ resolution?: "1K" | "4K"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type photaOutput = { /** * The URL of the generated image. */ images: Array; }; export type PhotaProfileDeleteOutput = { /** * The deleted profile ID. */ profile_id: string; /** * Whether the profile was deleted successfully. */ success: boolean; }; export type PhotaProfileOutput = { /** * The Photalabs profile ID. */ profile_id: string; }; export type PhotographicCharacteristics = { /** * The angle of the camera in the image to be generated. */ camera_angle?: string; /** * The depth of field in the image to be generated. */ depth_of_field?: string; /** * The focus in the image to be generated. */ focus?: string; /** * The focal length of the lens in the image to be generated. */ lens_focal_length?: string; }; export type PhotographyEffectsInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Default value: `"film"` */ effect_type?: "film" | "vintage_film" | "portrait_photography" | "fashion_photography" | "street_photography" | "sepia_tone" | "film_grain" | "light_leaks" | "vignette_effect" | "instant_camera" | "golden_hour" | "dramatic_lighting" | "soft_focus" | "bokeh_effect" | "high_contrast" | "double_exposure"; /** * Image URL for photography effects */ image_url: string | Blob | File; }; export type PhotoLoraI2IInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * LoRA Scale of the photo lora model Default value: `0.75` */ photo_lora_scale?: number; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type PhotoLoraInpaintInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to use for inpainting. or img2img */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The mask to area to Inpaint in. */ mask_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * LoRA Scale of the photo lora model Default value: `0.75` */ photo_lora_scale?: number; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ strength?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type photomakerInput = { /** * The base pipeline to use for generating the image. Default value: `"photomaker"` */ base_pipeline?: "photomaker" | "photomaker-style"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * The URL of the image archive containing the images you want to use. */ image_archive_url: string | Blob | File; /** * How much noise to add to the latent image. O for no noise, 1 for maximum noise. Default value: `0.5` */ initial_image_strength?: number; /** * Optional initial image for img2img */ initial_image_url?: string | Blob | File; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * Number of images to generate in one request. Note that the higher the batch size, * the longer it will take to generate the images. Default value: `1` */ num_images?: number; /** * Increasing the amount of steps tells Stable Diffusion that it should take more steps * to generate your final result which can increase the amount of detail in your image. Default value: `50` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * Default value: `"Photographic"` */ style?: "(No style)" | "Cinematic" | "Disney Character" | "Digital Art" | "Photographic" | "Fantasy art" | "Neonpunk" | "Enhance" | "Comic book" | "Lowpoly" | "Line art"; /** * Default value: `20` */ style_strength?: number; }; export type PhysicEditInput = { /** * Enable content safety checking on the output. Default value: `true` */ enable_safety_checker?: boolean; /** * Classifier-free guidance scale. Higher values follow the prompt more closely. Default value: `4` */ guidance_scale?: number; /** * URL of the image to edit with physics-aware transformations. */ image_url: string | Blob | File; /** * Description of unwanted elements in the output. Default value: `""` */ negative_prompt?: string; /** * Number of denoising steps. More steps generally produce higher quality but take longer. Default value: `40` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The editing instruction describing the physical transformation to apply. */ prompt: string; /** * Random seed for reproducibility. Same seed and prompt produce identical output. */ seed?: number; /** * If True, the image will be returned as a data URI. */ sync_mode?: boolean; }; export type PiDiInput = { /** * Whether to apply the filter to the image. */ apply_filter?: boolean; /** * URL of the image to process */ image_url: string | Blob | File; /** * Whether to use the safe version of the Pidi detector */ safe?: boolean; /** * Whether to use the scribble version of the Pidi detector */ scribble?: boolean; }; export type piflowInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. You can choose between some presets or custom height and width * that **must be multiples of 8**. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `8` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * Random seed for reproducible generation. If set to None, a random seed will be used. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type PikaImage = { /** * */ image_url: string | Blob | File; }; export type PikaV15PikaffectsInput = { /** * URL of the input image */ image_url: string | Blob | File; /** * Negative prompt to guide the model */ negative_prompt?: string; /** * The Pikaffect to apply */ pikaffect: "Cake-ify" | "Crumble" | "Crush" | "Decapitate" | "Deflate" | "Dissolve" | "Explode" | "Eye-pop" | "Inflate" | "Levitate" | "Melt" | "Peel" | "Poke" | "Squish" | "Ta-da" | "Tear"; /** * Text prompt to guide the effect */ prompt?: string; /** * The seed for the random number generator */ seed?: number; }; export type PikaV22ImageToVideoInput = { /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * A negative prompt to guide the model Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "720p" | "1080p"; /** * The seed for the random number generator */ seed?: number; }; export type PikaV22PikaframesInput = { /** * URLs of keyframe images (2-5 images) to create transitions between */ image_urls: Array; /** * A negative prompt to guide the model Default value: `""` */ negative_prompt?: string; /** * Default prompt for all transitions. Individual transition prompts override this. */ prompt?: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "720p" | "1080p"; /** * The seed for the random number generator */ seed?: number; /** * Configuration for each transition. Length must be len(image_urls) - 1. Total duration of all transitions must not exceed 25 seconds. If not provided, uses default 5-second transitions with the global prompt. */ transitions?: Array; }; export type PikaV22TextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:5" | "5:4" | "3:2" | "2:3"; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "10"; /** * A negative prompt to guide the model Default value: `"ugly, bad, terrible"` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "1080p" | "720p"; /** * The seed for the random number generator */ seed?: number; }; export type PikaV2PikadditionsInput = { /** * URL of the image to add */ image_url: string | Blob | File; /** * Negative prompt to guide the model */ negative_prompt?: string; /** * Text prompt describing what to add */ prompt?: string; /** * The seed for the random number generator */ seed?: number; /** * URL of the input video */ video_url: string | Blob | File; }; export type PixartSigmaInput = { /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The scheduler to use for the model. Default value: `"DPM-SOLVER"` */ scheduler?: "DPM-SOLVER" | "SA-SOLVER"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The style to apply to the image. Default value: `"(No style)"` */ style?: "(No style)" | "Cinematic" | "Photographic" | "Anime" | "Manga" | "Digital Art" | "Pixel art" | "Fantasy art" | "Neonpunk" | "3D Model"; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type PixverseExtendFastInput = { /** * Enable audio generation (BGM, SFX, dialogue). Supported in v5.6+ models. */ generate_audio_switch?: boolean; /** * The model version to use for generation Default value: `"v4.5"` */ model?: "v3.5" | "v4" | "v4.5" | "v5" | "v5.5" | "v5.6" | "v6"; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * Prompt describing how to extend the video */ prompt: string; /** * The resolution of the generated video. Fast mode doesn't support 1080p Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p"; /** * Random seed for generation */ seed?: number; /** * The style of the extended video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * URL of the input video to extend */ video_url: string | Blob | File; }; export type PixverseExtendInput = { /** * The duration of the generated video in seconds. 1080p videos are limited to 5 seconds Default value: `"5"` */ duration?: "5" | "8"; /** * Enable audio generation (BGM, SFX, dialogue). Supported in v5.6+ models. */ generate_audio_switch?: boolean; /** * The model version to use for generation Default value: `"v4.5"` */ model?: "v3.5" | "v4" | "v4.5" | "v5" | "v5.5" | "v5.6" | "v6"; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * Prompt describing how to extend the video */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * Random seed for generation */ seed?: number; /** * The style of the extended video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * URL of the input video to extend */ video_url: string | Blob | File; }; export type PixverseLipsyncInput = { /** * URL of the input audio. If not provided, TTS will be used. */ audio_url?: string | Blob | File; /** * Text content for TTS when audio_url is not provided */ text?: string; /** * URL of the input video */ video_url: string | Blob | File; /** * Voice to use for TTS when audio_url is not provided Default value: `"Auto"` */ voice_id?: "Emily" | "James" | "Isabella" | "Liam" | "Chloe" | "Adrian" | "Harper" | "Ava" | "Sophia" | "Julia" | "Mason" | "Jack" | "Oliver" | "Ethan" | "Auto"; }; export type PixverseSoundEffectsInput = { /** * Whether to keep the original audio from the video */ original_sound_switch?: boolean; /** * Description of the sound effect to generate. If empty, a random sound effect will be generated Default value: `""` */ prompt?: string; /** * URL of the input video to add sound effects to */ video_url: string | Blob | File; }; export type PixverseSwapInput = { /** * URL of the target image for swapping */ image_url: string | Blob | File; /** * The keyframe ID to use for face/object mapping. The input video is normalized to 24 FPS before processing, so keyframe 1 = first frame, keyframe 24 = 1 second in, etc. Valid range: 1 to (duration_seconds * 24). Default value: `1` */ keyframe_id?: number; /** * The swap mode to use Default value: `"person"` */ mode?: "person" | "object" | "background"; /** * Whether to keep the original audio Default value: `true` */ original_sound_switch?: boolean; /** * The output resolution (1080p not supported) Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p"; /** * Random seed for generation */ seed?: number; /** * URL of the external video to swap */ video_url: string | Blob | File; }; export type PixverseV4ImageToVideoFastInput = { /** * The type of camera movement to apply to the video */ camera_movement?: "horizontal_left" | "horizontal_right" | "vertical_up" | "vertical_down" | "zoom_in" | "zoom_out" | "crane_up" | "quickly_zoom_in" | "quickly_zoom_out" | "smooth_zoom_in" | "camera_rotation" | "robo_arm" | "super_dolly_out" | "whip_pan" | "hitchcock" | "left_follow" | "right_follow" | "pan_left" | "pan_right" | "fix_bg"; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; }; export type PixverseV4ImageToVideoInput = { /** * The type of camera movement to apply to the video */ camera_movement?: "horizontal_left" | "horizontal_right" | "vertical_up" | "vertical_down" | "zoom_in" | "zoom_out" | "crane_up" | "quickly_zoom_in" | "quickly_zoom_out" | "smooth_zoom_in" | "camera_rotation" | "robo_arm" | "super_dolly_out" | "whip_pan" | "hitchcock" | "left_follow" | "right_follow" | "pan_left" | "pan_right" | "fix_bg"; /** * The duration of the generated video in seconds. 8s videos cost double. 1080p videos are limited to 5 seconds Default value: `"5"` */ duration?: "5" | "8"; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; }; export type PixverseV4TextToVideoFastInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; }; export type PixverseV4TextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * The duration of the generated video in seconds. 8s videos cost double. 1080p videos are limited to 5 seconds Default value: `"5"` */ duration?: "5" | "8"; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; }; export type PixverseV55EffectsInput = { /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "8" | "10"; /** * The effect to apply to the video */ effect: "Kiss Me AI" | "Kiss" | "Muscle Surge" | "Warmth of Jesus" | "Anything, Robot" | "The Tiger Touch" | "Hug" | "Holy Wings" | "Microwave" | "Zombie Mode" | "Squid Game" | "Baby Face" | "Black Myth: Wukong" | "Long Hair Magic" | "Leggy Run" | "Fin-tastic Mermaid" | "Punch Face" | "Creepy Devil Smile" | "Thunder God" | "Eye Zoom Challenge" | "Who's Arrested?" | "Baby Arrived" | "Werewolf Rage" | "Bald Swipe" | "BOOM DROP" | "Huge Cutie" | "Liquid Metal" | "Sharksnap!" | "Dust Me Away" | "3D Figurine Factor" | "Bikini Up" | "My Girlfriends" | "My Boyfriends" | "Subject 3 Fever" | "Earth Zoom" | "Pole Dance" | "Vroom Dance" | "GhostFace Terror" | "Dragon Evoker" | "Skeletal Bae" | "Summoning succubus" | "Halloween Voodoo Doll" | "3D Naked-Eye AD" | "Package Explosion" | "Dishes Served" | "Ocean ad" | "Supermarket AD" | "Tree doll" | "Come Feel My Abs" | "The Bicep Flex" | "London Elite Vibe" | "Flora Nymph Gown" | "Christmas Costume" | "It's Snowy" | "Reindeer Cruiser" | "Snow Globe Maker" | "Pet Christmas Outfit" | "Adopt a Polar Pal" | "Cat Christmas Box" | "Starlight Gift Box" | "Xmas Poster" | "Pet Christmas Tree" | "City Santa Hat" | "Stocking Sweetie" | "Christmas Night" | "Xmas Front Page Karma" | "Grinch's Xmas Hijack" | "Giant Product" | "Truck Fashion Shoot" | "Beach AD" | "Shoal Surround" | "Mechanical Assembly" | "Lighting AD" | "Billboard AD" | "Product close-up" | "Parachute Delivery" | "Dreamlike Cloud" | "Macaron Machine" | "Poster AD" | "Truck AD" | "Graffiti AD" | "3D Figurine Factory" | "The Exclusive First Class" | "Art Zoom Challenge" | "I Quit" | "Hitchcock Dolly Zoom" | "Smell the Lens" | "I believe I can fly" | "Strikout Dance" | "Pixel World" | "Mint in Box" | "Hands up, Hand" | "Flora Nymph Go" | "Somber Embrace" | "Beam me up" | "Suit Swagger"; /** * Optional URL of the image to use as the first frame. If not provided, generates from text */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * The resolution of the generated video. Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV55ImageToVideoInput = { /** * The duration of the generated video in seconds. Longer durations cost more. 1080p videos are limited to 5 or 8 seconds Default value: `"5"` */ duration?: "5" | "8" | "10"; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * Enable multi-clip generation with dynamic camera changes */ generate_multi_clip_switch?: boolean; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV55TextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * The duration of the generated video in seconds. Longer durations cost more. 1080p videos are limited to 5 or 8 seconds Default value: `"5"` */ duration?: "5" | "8" | "10"; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * Enable multi-clip generation with dynamic camera changes */ generate_multi_clip_switch?: boolean; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV55TransitionInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * The duration of the generated video in seconds. Longer durations cost more. 1080p videos are limited to 5 or 8 seconds Default value: `"5"` */ duration?: "5" | "8" | "10"; /** * URL of the image to use as the last frame */ end_image_url?: string | Blob | File; /** * URL of the image to use as the first frame */ first_image_url: string | Blob | File; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * The prompt for the transition */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV56ImageToVideoInput = { /** * The duration of the generated video in seconds. 1080p videos are limited to 5 or 8 seconds Default value: `"5"` */ duration?: "5" | "8" | "10"; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV56TextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * The duration of the generated video in seconds. 1080p videos are limited to 5 or 8 seconds Default value: `"5"` */ duration?: "5" | "8" | "10"; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV5ImageToVideoInput = { /** * The duration of the generated video in seconds. 8s videos cost double. 1080p videos are limited to 5 seconds Default value: `"5"` */ duration?: "5" | "8"; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; }; export type PixverseV5TransitionInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * The duration of the generated video in seconds Default value: `"5"` */ duration?: "5" | "8"; /** * URL of the image to use as the last frame */ end_image_url?: string | Blob | File; /** * URL of the image to use as the first frame */ first_image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * The prompt for the transition */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; }; export type PixverseV6ExtendInput = { /** * The duration of the generated video in seconds. v6 supports values from 1 to 15 seconds Default value: `5` */ duration?: number; /** * Enable audio generation (BGM, SFX, dialogue). */ generate_audio_switch?: boolean; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * Prompt describing how to extend the video */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * Random seed for generation */ seed?: number; /** * The style of the extended video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * URL of the input video to extend */ video_url: string | Blob | File; }; export type PixverseV6ImageToVideoInput = { /** * The duration of the generated video in seconds. v6 supports values from 1 to 15 seconds Default value: `5` */ duration?: number; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * Enable multi-clip generation with dynamic camera changes */ generate_multi_clip_switch?: boolean; /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV6TextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "2:3" | "3:2" | "21:9"; /** * The duration of the generated video in seconds. v6 supports values from 1 to 15 seconds Default value: `5` */ duration?: number; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * Enable multi-clip generation with dynamic camera changes */ generate_multi_clip_switch?: boolean; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PixverseV6TransitionInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "2:3" | "3:2" | "21:9"; /** * The duration of the generated video in seconds. v6 supports values from 1 to 15 seconds Default value: `5` */ duration?: number; /** * URL of the image to use as the last frame */ end_image_url?: string | Blob | File; /** * URL of the image to use as the first frame */ first_image_url: string | Blob | File; /** * Enable audio generation (BGM, SFX, dialogue) */ generate_audio_switch?: boolean; /** * Enable multi-clip generation with dynamic camera changes */ generate_multi_clip_switch?: boolean; /** * Negative prompt to be used for the generation Default value: `""` */ negative_prompt?: string; /** * The prompt for the transition */ prompt: string; /** * The resolution of the generated video Default value: `"720p"` */ resolution?: "360p" | "540p" | "720p" | "1080p"; /** * The same seed and the same prompt given to the same version of the model * will output the same video every time. */ seed?: number; /** * The style of the generated video */ style?: "anime" | "3d_animation" | "clay" | "comic" | "cyberpunk"; /** * Prompt optimization mode: 'enabled' to optimize, 'disabled' to turn off, 'auto' for model decision */ thinking_type?: "enabled" | "disabled" | "auto"; }; export type PlaygroundV25Input = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; }; export type plushifyInput = { /** * Whether to enable the safety checker Default value: `true` */ enable_safety_checker?: boolean; /** * Guidance scale for the generation Default value: `3.5` */ guidance_scale?: number; /** * URL of the image to apply cartoon style to */ image_url: string | Blob | File; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * Number of inference steps Default value: `28` */ num_inference_steps?: number; /** * Prompt for the generation. Default is empty which is usually best, but sometimes it can help to add a description of the subject. Default value: `""` */ prompt?: string; /** * Scale factor for the Cartoon effect Default value: `1` */ scale?: number; /** * The seed for image generation. Same seed with same parameters will generate same image. */ seed?: number; /** * Whether to use CFG zero */ use_cfg_zero?: boolean; }; export type Point = { /** * The x coordinate. */ x: number; /** * The y coordinate. */ y: number; }; export type PointPrompt = { /** * The frame index to interact with. */ frame_index?: number; /** * 1 for foreground, 0 for background */ label?: "0" | "1"; /** * Optional object identifier. Prompts sharing an object id refine the same object. */ object_id?: number; /** * X Coordinate of the prompt */ x?: number; /** * Y Coordinate of the prompt */ y?: number; }; export type PointPromptBase = { /** * 1 for foreground, 0 for background */ label?: "0" | "1"; /** * Optional object identifier. Prompts sharing an object id refine the same object. */ object_id?: number; /** * X Coordinate of the prompt */ x?: number; /** * Y Coordinate of the prompt */ y?: number; }; export type Polygon = { /** * Label of the polygon */ label: string; /** * List of points */ points: Array; }; export type PolygonOutput = { /** * List of polygons */ polygons: Array; }; export type PonyV7Input = { /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * Classifier free guidance scale Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The source of the noise to use for generating images. * If set to 'gpu', the noise will be generated on the GPU. * If set to 'cpu', the noise will be generated on the CPU. Default value: `"gpu"` */ noise_source?: "gpu" | "cpu"; /** * The number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to take Default value: `40` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate images from */ prompt: string; /** * The seed to use for generating images */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type PortraitInput = { /** * Aspect ratio for 4K output (default: 3:4 for portraits) */ aspect_ratio?: AspectRatio; /** * Portrait image URL to enhance */ image_url: string | Blob | File; }; export type PoseTransferInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your input when generating the image. Default value: `2.5` */ guidance_scale?: number; /** * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * Url to the garment image. */ person_image_url: string | Blob | File; /** * Url for the human image. */ pose_image_url: string | Blob | File; /** * The same seed and the same input given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type PostProcessingInput = { /** * Blue channel shift direction Default value: `"horizontal"` */ blue_direction?: "horizontal" | "vertical"; /** * Blue channel shift amount */ blue_shift?: number; /** * Blur radius Default value: `3` */ blur_radius?: number; /** * Sigma for Gaussian blur Default value: `1` */ blur_sigma?: number; /** * Type of blur to apply Default value: `"gaussian"` */ blur_type?: "gaussian" | "kuwahara"; /** * Brightness adjustment */ brightness?: number; /** * CAS sharpening amount Default value: `0.8` */ cas_amount?: number; /** * Contrast adjustment */ contrast?: number; /** * Desaturation factor Default value: `1` */ desaturate_factor?: number; /** * Desaturation method Default value: `"luminance (Rec.709)"` */ desaturate_method?: "luminance (Rec.709)" | "luminance (Rec.601)" | "average" | "lightness"; /** * Dissolve blend factor Default value: `0.5` */ dissolve_factor?: number; /** * URL of second image for dissolve Default value: `""` */ dissolve_image_url?: string | Blob | File; /** * Dodge and burn intensity Default value: `0.5` */ dodge_burn_intensity?: number; /** * Dodge and burn mode Default value: `"dodge"` */ dodge_burn_mode?: "dodge" | "burn" | "dodge_and_burn" | "burn_and_dodge" | "color_dodge" | "color_burn" | "linear_dodge" | "linear_burn"; /** * Enable blur effect */ enable_blur?: boolean; /** * Enable chromatic aberration */ enable_chromatic?: boolean; /** * Enable color correction */ enable_color_correction?: boolean; /** * Enable desaturation effect */ enable_desaturate?: boolean; /** * Enable dissolve effect */ enable_dissolve?: boolean; /** * Enable dodge and burn effect */ enable_dodge_burn?: boolean; /** * Enable glow effect */ enable_glow?: boolean; /** * Enable film grain effect */ enable_grain?: boolean; /** * Enable parabolize effect */ enable_parabolize?: boolean; /** * Enable sharpen effect */ enable_sharpen?: boolean; /** * Enable solarize effect */ enable_solarize?: boolean; /** * Enable color tint effect */ enable_tint?: boolean; /** * Enable vignette effect */ enable_vignette?: boolean; /** * Gamma adjustment Default value: `1` */ gamma?: number; /** * Glow intensity Default value: `1` */ glow_intensity?: number; /** * Glow blur radius Default value: `5` */ glow_radius?: number; /** * Film grain intensity (when enabled) Default value: `0.4` */ grain_intensity?: number; /** * Film grain scale (when enabled) Default value: `10` */ grain_scale?: number; /** * Style of film grain to apply Default value: `"modern"` */ grain_style?: "modern" | "analog" | "kodak" | "fuji" | "cinematic" | "newspaper"; /** * Green channel shift direction Default value: `"horizontal"` */ green_direction?: "horizontal" | "vertical"; /** * Green channel shift amount */ green_shift?: number; /** * URL of image to process */ image_url: string | Blob | File; /** * Noise radius for smart sharpen Default value: `7` */ noise_radius?: number; /** * Parabolize coefficient Default value: `1` */ parabolize_coeff?: number; /** * Edge preservation factor Default value: `0.75` */ preserve_edges?: number; /** * Red channel shift direction Default value: `"horizontal"` */ red_direction?: "horizontal" | "vertical"; /** * Red channel shift amount */ red_shift?: number; /** * Saturation adjustment */ saturation?: number; /** * Sharpen strength (for basic mode) Default value: `1` */ sharpen_alpha?: number; /** * Type of sharpening to apply Default value: `"basic"` */ sharpen_mode?: "basic" | "smart" | "cas"; /** * Sharpen radius (for basic mode) Default value: `1` */ sharpen_radius?: number; /** * Smart sharpen blend ratio Default value: `0.5` */ smart_sharpen_ratio?: number; /** * Smart sharpen strength Default value: `5` */ smart_sharpen_strength?: number; /** * Solarize threshold Default value: `0.5` */ solarize_threshold?: number; /** * Color temperature adjustment */ temperature?: number; /** * Tint color mode Default value: `"sepia"` */ tint_mode?: "sepia" | "red" | "green" | "blue" | "cyan" | "magenta" | "yellow" | "purple" | "orange" | "warm" | "cool" | "lime" | "navy" | "vintage" | "rose" | "teal" | "maroon" | "peach" | "lavender" | "olive"; /** * Tint strength Default value: `1` */ tint_strength?: number; /** * Vertex X position Default value: `0.5` */ vertex_x?: number; /** * Vertex Y position Default value: `0.5` */ vertex_y?: number; /** * Vignette strength (when enabled) Default value: `0.5` */ vignette_strength?: number; }; export type ProductHoldingInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Image URL of the person who will hold the product */ person_image_url: string | Blob | File; /** * Image URL of the product to be held by the person */ product_image_url: string | Blob | File; }; export type ProductPhotographyInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Image URL of the product to create professional studio photography */ product_image_url: string | Blob | File; }; export type ProductShotInput = { /** * Whether to use the fast model Default value: `true` */ fast?: boolean; /** * The URL of the product shot to be placed in a lifestyle shot. If both image_url and image_file are provided, image_url will be used. Accepted formats are jpeg, jpg, png, webp. Maximum file size 12MB. */ image_url: string | Blob | File; /** * If you've selected placement_type=manual_placement, you should use this parameter to specify which placements/positions you would like to use from the list. You can select more than one placement in one request. Default value: `"bottom_center"` */ manual_placement_selection?: "upper_left" | "upper_right" | "bottom_left" | "bottom_right" | "right_center" | "left_center" | "upper_center" | "bottom_center" | "center_vertical" | "center_horizontal"; /** * The number of lifestyle product shots you would like to generate. You will get num_results x 10 results when placement_type=automatic and according to the number of required placements x num_results if placement_type=manual_placement. Default value: `1` */ num_results?: number; /** * Whether to optimize the scene description Default value: `true` */ optimize_description?: boolean; /** * This flag is only relevant when placement_type=original. If true, the output image retains the original input image's size; otherwise, the image is scaled to 1 megapixel (1MP) while preserving its aspect ratio. */ original_quality?: boolean; /** * The desired padding in pixels around the product, when using placement_type=manual_padding. The order of the values is [left, right, top, bottom]. For optimal results, the total number of pixels, including padding, should be around 1,000,000. It is recommended to first use the product cutout API, get the cutout and understand the size of the result, and then define the required padding and use the cutout as an input for this API. */ padding_values?: Array; /** * This parameter allows you to control the positioning of the product in the image. Choosing 'original' will preserve the original position of the product in the image. Choosing 'automatic' will generate results with the 10 recommended positions for the product. Choosing 'manual_placement' will allow you to select predefined positions (using the parameter 'manual_placement_selection'). Selecting 'manual_padding' will allow you to control the position and size of the image by defining the desired padding in pixels around the product. Default value: `"manual_placement"` */ placement_type?: "original" | "automatic" | "manual_placement" | "manual_padding"; /** * The URL of the reference image to be used for generating the new scene or background for the product shot. Use "" to leave empty.Either ref_image_url or scene_description has to be provided but not both. If both ref_image_url and ref_image_file are provided, ref_image_url will be used. Accepted formats are jpeg, jpg, png, webp. Default value: `""` */ ref_image_url?: string | Blob | File; /** * Text description of the new scene or background for the provided product shot. Bria currently supports prompts in English only, excluding special characters. */ scene_description?: string; /** * The desired size of the final product shot. For optimal results, the total number of pixels should be around 1,000,000. This parameter is only relevant when placement_type=automatic or placement_type=manual_placement. */ shot_size?: Array; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ProImageTo3DInput = { /** * Optional back/rear view image URL (JPG/PNG recommended). */ back_image_url?: string | Blob | File; /** * Optional bottom view image URL (v3.1 exclusive, JPG/PNG recommended). */ bottom_image_url?: string | Blob | File; /** * Enable PBR material generation (metallic, roughness, normal textures). Ignored when generate_type is Geometry. */ enable_pbr?: boolean; /** * Target polygon face count. Range: 40,000-1,500,000. Default: 500,000. Default value: `500000` */ face_count?: number; /** * Generation task type. Normal: textured model. Geometry: geometry-only white model (no textures). LowPoly/Sketch are not available in v3.1. Default value: `"Normal"` */ generate_type?: "Normal" | "Geometry"; /** * Front view image URL. Resolution: 128-5000px, max 8MB, formats: JPG/PNG/WEBP. Tips: simple background, single object, object >50% of frame. */ input_image_url: string | Blob | File; /** * Optional left-front 45 degree angle view image URL (v3.1 exclusive, JPG/PNG recommended). */ left_front_image_url?: string | Blob | File; /** * Optional left side view image URL (JPG/PNG recommended). */ left_image_url?: string | Blob | File; /** * Optional right-front 45 degree angle view image URL (v3.1 exclusive, JPG/PNG recommended). */ right_front_image_url?: string | Blob | File; /** * Optional right side view image URL (JPG/PNG recommended). */ right_image_url?: string | Blob | File; /** * Optional top view image URL (v3.1 exclusive, JPG/PNG recommended). */ top_image_url?: string | Blob | File; }; export type ProImageToVideoHailuo02Input = { /** * Optional URL of the image to use as the last frame of the video */ end_image_url?: string | Blob | File; /** * */ image_url: string | Blob | File; /** * */ prompt: string; /** * Whether to use the model's prompt optimizer Default value: `true` */ prompt_optimizer?: boolean; }; export type ProImageToVideoHailuo23Input = { /** * URL of the image to use as the first frame */ image_url: string | Blob | File; /** * Text prompt for video generation */ prompt: string; /** * Whether to use the model's prompt optimizer Default value: `true` */ prompt_optimizer?: boolean; }; export type ProImageToVideoInput = { /** * The aspect ratio of the generated video Default value: `"auto"` */ aspect_ratio?: "auto" | "9:16" | "16:9"; /** * Up to two character IDs (from create-character) to use in the video. Refer to characters by name in the prompt. When set, only the OpenAI provider is used. */ character_ids?: Array; /** * Whether to delete the video after generation for privacy reasons. If True, the video cannot be used for remixing and will be permanently deleted. Default value: `true` */ delete_video?: boolean; /** * If enabled, the prompt (and image for image-to-video) will be checked for known intellectual property references and the request will be blocked if any are detected. */ detect_and_block_ip?: boolean; /** * Duration of the generated video in seconds Default value: `"4"` */ duration?: "4" | "8" | "12" | "16" | "20"; /** * The URL of the image to use as the first frame */ image_url: string | Blob | File; /** * The text prompt describing the video you want to generate */ prompt: string; /** * The resolution of the generated video Default value: `"auto"` */ resolution?: "auto" | "720p" | "1080p" | "true_1080p"; }; export type PromptInput = { /** * The URL of the image to remove objects from. */ image_url: string | Blob | File; /** * Amount of pixels to expand the mask by. Range: 0-50 Default value: `15` */ mask_expansion?: number; /** * Default value: `"best_quality"` */ model?: "low_quality" | "medium_quality" | "high_quality" | "best_quality"; /** * Text description of the object to remove. */ prompt: string; }; export type PromptObject = { /** * The action of the object in the image. */ action?: string; /** * The appearance details of the object. */ appearance_details?: string; /** * Bounding boxes defining the location of the object in the image. */ bounding_boxes?: Array; /** * The clothing of the object in the image. */ clothing?: string; /** * A list of colors associated with the object. */ colors?: Array; /** * A description of the object to be generated. */ description?: string; /** * The expression of the object in the image. */ expression?: string; /** * The gender of the object in the image. */ gender?: string; /** * The location of the object in the image. */ location?: string; /** * The number of objects in the image. */ number_of_objects?: number; /** * The orientation of the object in the image. */ orientation?: string; /** * The pose of the object in the image. */ pose?: string; /** * The relationship of the object to other objects in the image. */ relationship?: string; /** * The relative distance of the object from the camera or viewer. */ relative_distance?: number; /** * The relative size of the object in the image. */ relative_size?: string; /** * The shape and color of the object. */ shape_and_color?: string; /** * The RGB color values for the skin tone of the object. */ skin_color?: Color; /** * The skin tone and texture of the object in the image. */ skin_tone_and_texture?: string; /** * The texture of the object. */ texture?: string; }; export type PronunciationDict = { /** * List of pronunciation replacements in format ['text/(pronunciation)', ...]. For Chinese, tones are 1-5. Example: ['燕少飞/(yan4)(shao3)(fei1)'] */ tone_list?: Array; }; export type PronunciationDictionaryLocator = { /** * The ID of the pronunciation dictionary. */ pronunciation_dictionary_id?: string; /** * The ID of the version of the pronunciation dictionary. If not provided, the latest version will be used. */ version_id?: string; }; export type ProTextTo3DInput = { /** * Enable PBR material generation (metallic, roughness, normal textures). Ignored when generate_type is Geometry. */ enable_pbr?: boolean; /** * Target polygon face count. Range: 40,000-1,500,000. Default: 500,000. Default value: `500000` */ face_count?: number; /** * Generation task type. Normal: textured model. Geometry: geometry-only white model (no textures). LowPoly/Sketch are not available in v3.1. Default value: `"Normal"` */ generate_type?: "Normal" | "Geometry"; /** * Text description of the 3D content to generate. Max 1024 UTF-8 characters. */ prompt: string; }; export type ProTextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "9:16" | "16:9"; /** * Up to two character IDs (from create-character) to use in the video. Refer to characters by name in the prompt. When set, only the OpenAI provider is used. */ character_ids?: Array; /** * Whether to delete the video after generation for privacy reasons. If True, the video cannot be used for remixing and will be permanently deleted. Default value: `true` */ delete_video?: boolean; /** * If enabled, the prompt (and image for image-to-video) will be checked for known intellectual property references and the request will be blocked if any are detected. */ detect_and_block_ip?: boolean; /** * Duration of the generated video in seconds Default value: `"4"` */ duration?: "4" | "8" | "12" | "16" | "20"; /** * The text prompt describing the video you want to generate */ prompt: string; /** * The resolution of the generated video Default value: `"1080p"` */ resolution?: "720p" | "1080p" | "true_1080p"; }; export type pshumanInput = { /** * Guidance scale for the diffusion process. Controls how much the output adheres to the generated views. Default value: `4` */ guidance_scale?: number; /** * A direct URL to the input image of a person. */ image_url: string | Blob | File; /** * Seed for reproducibility. If None, a random seed will be used. */ seed?: number; }; export type pshumanOutput = { /** * The generated 3D model in OBJ format. */ model_obj: File; /** * A preview image showing the input and the generated multi-view outputs. */ preview_image: File; }; export type pulidInput = { /** * Guidance scale Default value: `1.2` */ guidance_scale?: number; /** * if you want to mix two ID image, please turn this on, otherwise, turn this off */ id_mix?: boolean; /** * ID scale Default value: `0.8` */ id_scale?: number; /** * Size of the generated image */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Mode of generation Default value: `"fidelity"` */ mode?: "fidelity" | "extreme style"; /** * Negative prompt to generate the face from Default value: `"flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, hands, low resolution, partially rendered objects, deformed or partially rendered eyes, deformed, deformed eyeballs, cross-eyed,blurry"` */ negative_prompt?: string; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * Number of steps to take Default value: `4` */ num_inference_steps?: number; /** * Prompt to generate the face from */ prompt: string; /** * List of reference faces, ideally 4 images. */ reference_images: Array; /** * Random seed for reproducibility */ seed?: number; }; export type pulidOutput = { /** * List of generated images */ images: Array; /** * Random seed used for reproducibility */ seed: number; }; export type QueryInput = { /** * Image URL to be processed */ image_url: string | Blob | File; /** * Maximum number of tokens to generate Default value: `64` */ max_tokens?: number; /** * Prompt for query task */ prompt: string; /** * Type of task to perform Default value: `"caption"` */ task_type?: "caption" | "query"; }; export type Qwen3CloneVoiceInput = { /** * URL to the reference audio file used for voice cloning. */ audio_url: string | Blob | File; /** * Optional reference text that was used when creating the speaker embedding. Providing this can improve synthesis quality when using a cloned voice. */ reference_text?: string; }; export type Qwen3CloneVoiceOutput = { /** * The generated speaker embedding file in safetensors format. */ speaker_embedding: File; }; export type Qwen3DesignVoiceInput = { /** * The language of the voice to be designed. Default value: `"Auto"` */ language?: "Auto" | "English" | "Chinese" | "Spanish" | "French" | "German" | "Italian" | "Japanese" | "Korean" | "Portuguese" | "Russian"; /** * Maximum number of new codec tokens to generate. Default value: `200` */ max_new_tokens?: number; /** * Optional prompt to guide the style of the generated speech. */ prompt: string; /** * Penalty to reduce repeated tokens/codes. Default value: `1.05` */ repetition_penalty?: number; /** * Sampling switch for the sub-talker. Default value: `true` */ subtalker_dosample?: boolean; /** * Temperature for sub-talker sampling. Default value: `0.9` */ subtalker_temperature?: number; /** * Top-k for sub-talker sampling. Default value: `50` */ subtalker_top_k?: number; /** * Top-p for sub-talker sampling. Default value: `1` */ subtalker_top_p?: number; /** * Sampling temperature; higher => more random. Default value: `0.9` */ temperature?: number; /** * The text to be converted to speech. */ text: string; /** * Top-k sampling parameter. Default value: `50` */ top_k?: number; /** * Top-p sampling parameter. Default value: `1` */ top_p?: number; }; export type Qwen3GuardInput = { /** * The input text to be classified */ prompt: string; }; export type Qwen3GuardOutput = { /** * The confidence score of the classification */ categories: Array<"Violent" | "Non-violent Illegal Acts" | "Sexual Content or Sexual Acts" | "PII" | "Suicide & Self-Harm" | "Unethical Acts" | "Politically Sensitive Topics" | "Copyright Violation" | "Jailbreak" | "None">; /** * The classification label */ label: "Safe" | "Unsafe" | "Controversial"; }; export type Qwen3TTSInput = { /** * The language of the voice. Default value: `"Auto"` */ language?: "Auto" | "English" | "Chinese" | "Spanish" | "French" | "German" | "Italian" | "Japanese" | "Korean" | "Portuguese" | "Russian"; /** * Maximum number of new codec tokens to generate. Default value: `200` */ max_new_tokens?: number; /** * Optional prompt to guide the style of the generated speech. This prompt will be ignored if a speaker embedding is provided. */ prompt?: string; /** * Optional reference text that was used when creating the speaker embedding. Providing this can improve synthesis quality when using a cloned voice. */ reference_text?: string; /** * Penalty to reduce repeated tokens/codes. Default value: `1.05` */ repetition_penalty?: number; /** * URL to a speaker embedding file in safetensors format, from `fal-ai/qwen-3-tts/clone-voice` endpoint. If provided, the TTS model will use the cloned voice for synthesis instead of the predefined voices. */ speaker_voice_embedding_file_url?: string | Blob | File; /** * Sampling switch for the sub-talker. Default value: `true` */ subtalker_dosample?: boolean; /** * Temperature for sub-talker sampling. Default value: `0.9` */ subtalker_temperature?: number; /** * Top-k for sub-talker sampling. Default value: `50` */ subtalker_top_k?: number; /** * Top-p for sub-talker sampling. Default value: `1` */ subtalker_top_p?: number; /** * Sampling temperature; higher => more random. Default value: `0.9` */ temperature?: number; /** * The text to be converted to speech. */ text: string; /** * Top-k sampling parameter. Default value: `50` */ top_k?: number; /** * Top-p sampling parameter. Default value: `1` */ top_p?: number; /** * The voice to be used for speech synthesis, will be ignored if a speaker embedding is provided. Check out the **[documentation](https://github.com/QwenLM/Qwen3-TTS/tree/main?tab=readme-ov-file#custom-voice-generate)** for each voice's details and which language they primarily support. */ voice?: "Vivian" | "Serena" | "Uncle_Fu" | "Dylan" | "Eric" | "Ryan" | "Aiden" | "Ono_Anna" | "Sohee"; }; export type QwenImage2EditInput = { /** * Enable LLM prompt optimization for better results. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Enable content moderation for input and output. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. If not provided, the size of the final input image will be used. Total number of pixels must be between 512x512 and 2048x2048. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Reference images for editing (1-3 images required). Order matters: reference as 'image 1', 'image 2', 'image 3' in prompt. Resolution: 384-5000px each dimension. Max size: 10MB each. Formats: JPEG, JPG, PNG (no alpha), WEBP. */ image_urls: Array; /** * Content to avoid in the generated image. Max 500 characters. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text prompt describing the desired image. Supports Chinese and English. Max 800 characters. */ prompt: string; /** * Random seed for reproducibility (0-2147483647). */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type QwenImage2TextToImageInput = { /** * Enable LLM prompt optimization for better results. Default value: `true` */ enable_prompt_expansion?: boolean; /** * Enable content moderation for input and output. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Total number of pixels must be between 512x512 and 2048x2048. Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Content to avoid in the generated image. Max 500 characters. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * Text prompt describing the desired image. Supports Chinese and English. */ prompt: string; /** * Random seed for reproducibility (0-2147483647). */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type QwenImageEdit2511MultipleAnglesInput = { /** * Acceleration level for image generation. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Additional text to append to the automatically generated prompt. */ additional_prompt?: string; /** * Whether to enable the safety checker. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale. Default value: `4.5` */ guidance_scale?: number; /** * Horizontal rotation angle around the object in degrees. 0°=front view, 90°=right side, 180°=back view, 270°=left side, 360°=front view again. */ horizontal_angle?: number; /** * The size of the generated image. If not provided, the size of the input image will be used. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to adjust camera angle for. */ image_urls: Array; /** * The scale factor for the LoRA model. Controls the strength of the camera control effect. Default value: `1` */ lora_scale?: number; /** * The negative prompt for the generation Default value: `""` */ negative_prompt?: string; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the output image Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * Random seed for reproducibility. */ seed?: number; /** * If `True`, the media will be returned as a data URI. */ sync_mode?: boolean; /** * Vertical camera angle in degrees. -30°=low-angle shot (looking up), 0°=eye-level, 30°=elevated, 60°=high-angle, 90°=bird's-eye view (looking down). */ vertical_angle?: number; /** * Camera zoom/distance. 0=wide shot (far away), 5=medium shot (normal), 10=close-up (very close). Default value: `5` */ zoom?: number; }; export type QwenImageEditInpaintInput = { /** * Acceleration level for image generation. Options: 'none', 'regular'. Higher acceleration increases speed. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * The URL of the mask for inpainting */ mask_url: string | Blob | File; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate the image with */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Strength of noising process for inpainting Default value: `0.93` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type QwenImageEditInput = { /** * Acceleration level for image generation. Options: 'none', 'regular'. Higher acceleration increases speed. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate the image with */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type QwenImageEditLoraInput = { /** * Acceleration level for image generation. Options: 'none', 'regular'. Higher acceleration increases speed. 'regular' balances speed and quality. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to edit. */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use up to 3 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate the image with */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type QwenImageI2IInput = { /** * Acceleration level for image generation. Options: 'none', 'regular', 'high'. Higher acceleration increases speed. 'regular' balances speed and quality. 'high' is recommended for images without text. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the generated image. By default, we will use the provided image for determining the image_size. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The reference image to guide the generation. */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use up to 3 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate the image with */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Denoising strength. 1.0 = fully remake; 0.0 = preserve original. Default value: `0.6` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Enable turbo mode for faster generation with high quality. When enabled, uses optimized settings (10 steps, CFG=1.2). */ use_turbo?: boolean; }; export type QwenImageInput = { /** * Acceleration level for image generation. Options: 'none', 'regular', 'high'. Higher acceleration increases speed. 'regular' balances speed and quality. 'high' is recommended for images without text. Default value: `"none"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The LoRAs to use for the image generation. You can use up to 3 LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate the image with */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Enable turbo mode for faster generation with high quality. When enabled, uses optimized settings (10 steps, CFG=1.2). */ use_turbo?: boolean; }; export type QwenImageLayeredInput = { /** * The acceleration level to use. Default value: `"regular"` */ acceleration?: "none" | "regular" | "high"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The guidance scale to use for the image generation. Default value: `5` */ guidance_scale?: number; /** * The URL of the input image. */ image_url: string | Blob | File; /** * The negative prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The number of layers to generate. Default value: `4` */ num_layers?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "png" | "webp"; /** * A caption for the input image. */ prompt?: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type QwenImageLayeredOutput = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image files info. */ images: Array; /** * The prompt used to generate the image. */ prompt?: string; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; }; export type QwenImageTrainerInput = { /** * URL to zip archive with images for training. The archive should contain images and corresponding text files with captions. * Each text file should have the same name as the image file it corresponds to (e.g., image1.jpg and image1.txt). * If text files are missing for some images, you can provide a trigger_phrase to automatically create them. * Supported image formats: PNG, JPG, JPEG, WEBP. * Try to use at least 10 images, although more is better. */ image_data_url: string | Blob | File; /** * Learning rate for training. Default is 5e-4 Default value: `0.0005` */ learning_rate?: number; /** * Total number of training steps to perform. Default is 4000. Default value: `1000` */ steps?: number; /** * Default caption to use for images that don't have corresponding text files. If provided, missing .txt files will be created automatically. Default value: `""` */ trigger_phrase?: string; }; export type RapidImageTo3DInput = { /** * Generate geometry-only white model without textures. When enabled, enable_pbr is ignored and OBJ is not supported (default output is GLB). */ enable_geometry?: boolean; /** * Enable PBR material generation (metallic, roughness, normal textures). Does not take effect when enable_geometry is True. */ enable_pbr?: boolean; /** * Front view image URL. Resolution: 128-5000px, max 8MB (recommended ≤6MB for base64 encoding), formats: JPG/PNG/WEBP. Tips: simple background, single object, object >50% of frame. */ input_image_url: string | Blob | File; }; export type RapidImageTo3DOutput = { /** * MTL material file for the OBJ model. */ material_mtl?: File; /** * Generated 3D model file. Contains GLB if available, otherwise OBJ. */ model_glb?: File; /** * URLs for different 3D model formats. */ model_urls: ModelUrls; /** * Texture image for the 3D model. */ texture?: File; /** * Preview thumbnail of the generated model */ thumbnail?: File; }; export type RapidTextTo3DInput = { /** * Generate geometry-only white model without textures. When enabled, enable_pbr is ignored and OBJ is not supported (default output is GLB). */ enable_geometry?: boolean; /** * Enable PBR material generation (metallic, roughness, normal textures). Does not take effect when enable_geometry is True. */ enable_pbr?: boolean; /** * Text description of the 3D content to generate. Max 200 UTF-8 characters. */ prompt: string; }; export type RapidTextTo3DOutput = { /** * MTL material file for the OBJ model. */ material_mtl?: File; /** * Generated 3D model in OBJ format. */ model_obj?: File; /** * URLs for different 3D model formats. */ model_urls: ModelUrls; /** * Texture image for the 3D model. */ texture?: File; /** * Preview thumbnail of the generated model */ thumbnail?: File; }; export type RawImage = { /** * */ content: string; /** * Default value: `"image/jpeg"` */ content_type?: string; /** * */ height: number; /** * */ width: number; }; export type React1Input = { /** * URL to the input audio. Must be **15 seconds or shorter**. */ audio_url: string | Blob | File; /** * Emotion prompt for the generation. Currently supports single-word emotions only. */ emotion: "happy" | "angry" | "sad" | "neutral" | "disgusted" | "surprised"; /** * Lipsync mode when audio and video durations are out of sync. Default value: `"bounce"` */ lipsync_mode?: "cut_off" | "loop" | "bounce" | "silence" | "remap"; /** * Controls the edit region and movement scope for the model. Available options: * - `lips`: Only lipsync using react-1 (minimal facial changes). * - `face`: Lipsync + facial expressions without head movements. * - `head`: Lipsync + facial expressions + natural talking head movements. Default value: `"face"` */ model_mode?: "lips" | "face" | "head"; /** * Controls the expresiveness of the lipsync. Default value: `0.5` */ temperature?: number; /** * URL to the input video. Must be **15 seconds or shorter**. */ video_url: string | Blob | File; }; export type React1Output = { /** * The generated video with synchronized lip and facial movements. */ video: VideoFile; }; export type RealismInput = { /** * Whether to enable the safety checker for the generated image. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * URL of the image to enhance with realism details. */ image_url: string | Blob | File; /** * The scale factor for the LoRA model. Controls the strength of the LoRA effect. Default value: `0.6` */ lora_scale?: number; /** * Number of inference steps for sampling. Default value: `30` */ num_inference_steps?: number; /** * The same seed and the same prompt given to the same version of the model will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type RealisticVisionImageToImageInput = { /** * If set to true, the output cropped to the proper aspect ratio after generating. */ crop_output?: boolean; /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The Realistic Vision model to use. */ model_name?: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * If set to true, the aspect ratio of the generated image will be preserved even * if the image size is too large. However, if the image is not a multiple of 32 * in width or height, it will be resized to the nearest multiple of 32. By default, * this snapping to the nearest multiple of 32 will not preserve the aspect ratio. * Set crop_output to True, to crop the output to the proper aspect ratio * after generating. */ preserve_aspect_ratio?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type RealisticVisionInpaintingInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** * The list of LoRA weights to use. */ loras?: Array; /** * The URL of the mask to use for inpainting. */ mask_url: string | Blob | File; /** * The Realistic Vision model to use. */ model_name?: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * determines how much the generated image resembles the initial image Default value: `0.95` */ strength?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type RealisticVisionInput = { /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The rescale factor for the CFG. */ guidance_rescale?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The list of LoRA weights to use. */ loras?: Array; /** * The Realistic Vision model to use. */ model_name?: string; /** * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type RealtimeEditInput = { /** * Enable RIFE frame interpolation between consecutive frames (doubles output frames). */ enable_interpolation?: boolean; /** * The size of the generated image. square=768x768, square_hd=1024x1024. Default value: `"square"` */ image_size?: "square" | "square_hd"; /** * Base64-encoded image data URI for editing. CDN URLs are not supported for realtime. For optimal performance, use 704x704 JPEG images with 50% quality. Other sizes will be resized automatically. */ image_url: string | Blob | File; /** * Default value: `3` */ num_inference_steps?: number; /** * Output feedback loop. 1.0 = pure noise (no feedback), 0.9 = 90% noise + 10% previous output latent. Default value: `1` */ output_feedback_strength?: number; /** * The prompt to guide image editing. Default value: `"Turn this into "Living oil painting, melting gold and sapphire""` */ prompt?: string; /** * Schedule mu for time shift. 2.3=default, lower=more even denoising, 0.3=nearly linear. Default value: `2.3` */ schedule_mu?: number; /** * Random seed for reproducibility. Default value: `35` */ seed?: number; }; export type RealtimeEditOutput = { /** * Generated images as raw bytes. When interpolation is enabled, returns [interpolated_frame, current_frame] in chronological order. Otherwise returns [current_frame]. */ images: Array; /** * Seed used for generation. */ seed: number; }; export type RealtimeImage = { /** * */ content: string; /** * */ content_type: string; }; export type RealtimeInput = { /** * Background color to use in the output image Default value: `"white"` */ background_color?: string; /** * Image bytes to remove background from */ image_bytes: string; /** * Model to use for background removal Default value: `"General Use (Light)"` */ model?: "BiRefNet-DIS_ep580.pth" | "BiRefNet-portrait-TR_P3M_10k-epoch_120.pth" | "BiRefNet-massive-epoch_240.pth"; }; export type RealtimeLivePortraitImageInput = { /** * Amount to open mouth in 'aaa' shape */ aaa?: number; /** * Amount to blink the eyes */ blink?: number; /** * Size of the output image. Default value: `512` */ dsize?: number; /** * Amount to shape mouth in 'eee' position */ eee?: number; /** * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. * The safety checker will process the input image */ enable_safety_checker?: boolean; /** * Amount to raise or lower eyebrows */ eyebrow?: number; /** * Whether to crop the source portrait to the face-cropping space. Default value: `true` */ flag_do_crop?: boolean; /** * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` */ flag_do_rot?: boolean; /** * Whether to set the lip to closed state before animation. Only takes effect when flag_eye_retargeting and flag_lip_retargeting are False. Default value: `true` */ flag_lip_zero?: boolean; /** * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` */ flag_pasteback?: boolean; /** * URL of the image to be animated */ image_url: string | Blob | File; /** * Output format Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * Amount to move pupils horizontally */ pupil_x?: number; /** * Amount to move pupils vertically */ pupil_y?: number; /** * A unique identifier for the request. Default value: `"null"` */ request_id?: string; /** * Amount to rotate the face in pitch */ rotate_pitch?: number; /** * Amount to rotate the face in roll */ rotate_roll?: number; /** * Amount to rotate the face in yaw */ rotate_yaw?: number; /** * Scaling factor for the face crop. Default value: `2.3` */ scale?: number; /** * Amount to smile */ smile?: number; /** * Horizontal offset ratio for face crop. */ vx_ratio?: number; /** * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` */ vy_ratio?: number; /** * Amount to wink */ wink?: number; /** * Amount to shape mouth in 'woo' position */ woo?: number; }; export type RealtimeLivePortraitImageOutput = { /** * The generated image file. */ image: RealtimeImage; /** * Default value: `"null"` */ request_id?: string; }; export type RealtimeLivePortraitInput = { /** * Amount to open mouth in 'aaa' shape */ aaa?: number; /** * Amount to blink the eyes */ blink?: number; /** * Size of the output image. Default value: `512` */ dsize?: number; /** * Amount to shape mouth in 'eee' position */ eee?: number; /** * Amount to raise or lower eyebrows */ eyebrow?: number; /** * Whether to crop the source portrait to the face-cropping space. Default value: `true` */ flag_do_crop?: boolean; /** * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` */ flag_do_rot?: boolean; /** * Whether to enable eye retargeting. */ flag_eye_retargeting?: boolean; /** * Whether to enable lip retargeting. */ flag_lip_retargeting?: boolean; /** * Whether to set the lip to closed state before animation. Only takes effect when flag_eye_retargeting and flag_lip_retargeting are False. Default value: `true` */ flag_lip_zero?: boolean; /** * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` */ flag_pasteback?: boolean; /** * Whether to use relative motion. Default value: `true` */ flag_relative?: boolean; /** * Whether to enable stitching. Recommended to set to True. Default value: `true` */ flag_stitching?: boolean; /** * */ image_bytes: string; /** * */ initial_image_url: string | Blob | File; /** * Amount to move pupils horizontally */ pupil_x?: number; /** * Amount to move pupils vertically */ pupil_y?: number; /** * Default value: `"null"` */ r_d_0?: string; /** * Default value: `"null"` */ request_id?: string; /** * */ return_relative_info?: boolean; /** * Amount to rotate the face in pitch */ rotate_pitch?: number; /** * Amount to rotate the face in roll */ rotate_roll?: number; /** * Amount to rotate the face in yaw */ rotate_yaw?: number; /** * Scaling factor for the face crop. Default value: `2.3` */ scale?: number; /** * Amount to smile */ smile?: number; /** * Horizontal offset ratio for face crop. */ vx_ratio?: number; /** * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` */ vy_ratio?: number; /** * Amount to wink */ wink?: number; /** * Amount to shape mouth in 'woo' position */ woo?: number; /** * Default value: `"null"` */ x_d_0_info?: string; }; export type RealtimeLivePortraitOutput = { /** * */ images: Array; /** * Default value: `"null"` */ r_d_0?: string; /** * Default value: `"null"` */ request_id?: string; /** * Default value: `"null"` */ x_d_0_info?: string; }; export type RealtimeOutput = { /** * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; /** * The generated image files info. */ images: Array; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** * */ timings: unknown; }; export type Recraft20bInput = { /** * An array of preferable colors */ colors?: Array; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * */ prompt: string; /** * The style of the generated images. Vector images cost 2X as much. Default value: `"realistic_image"` */ style?: "any" | "realistic_image" | "digital_illustration" | "vector_illustration" | "realistic_image/b_and_w" | "realistic_image/enterprise" | "realistic_image/hard_flash" | "realistic_image/hdr" | "realistic_image/motion_blur" | "realistic_image/natural_light" | "realistic_image/studio_portrait" | "digital_illustration/2d_art_poster" | "digital_illustration/2d_art_poster_2" | "digital_illustration/3d" | "digital_illustration/80s" | "digital_illustration/engraving_color" | "digital_illustration/glow" | "digital_illustration/grain" | "digital_illustration/hand_drawn" | "digital_illustration/hand_drawn_outline" | "digital_illustration/handmade_3d" | "digital_illustration/infantile_sketch" | "digital_illustration/kawaii" | "digital_illustration/pixel_art" | "digital_illustration/psychedelic" | "digital_illustration/seamless" | "digital_illustration/voxel" | "digital_illustration/watercolor" | "vector_illustration/cartoon" | "vector_illustration/doodle_line_art" | "vector_illustration/engraving" | "vector_illustration/flat_2" | "vector_illustration/kawaii" | "vector_illustration/line_art" | "vector_illustration/line_circuit" | "vector_illustration/linocut" | "vector_illustration/seamless"; /** * The ID of the custom style reference (optional) */ style_id?: string; }; export type RecraftV3ImageToImageInput = { /** * An array of preferable colors */ colors?: Array; /** * The URL of the image to modify. Must be less than 5 MB in size, have resolution less than 16 MP and max dimension less than 4096 pixels. */ image_url: string | Blob | File; /** * A text description of undesired elements on an image */ negative_prompt?: string; /** * A text description of areas to change. */ prompt: string; /** * Defines the difference with the original image, should lie in [0, 1], where 0 means almost identical, and 1 means miserable similarity Default value: `0.5` */ strength?: number; /** * The style of the generated images. Vector images cost 2X as much. Default value: `"realistic_image"` */ style?: "any" | "realistic_image" | "digital_illustration" | "vector_illustration" | "realistic_image/b_and_w" | "realistic_image/hard_flash" | "realistic_image/hdr" | "realistic_image/natural_light" | "realistic_image/studio_portrait" | "realistic_image/enterprise" | "realistic_image/motion_blur" | "realistic_image/evening_light" | "realistic_image/faded_nostalgia" | "realistic_image/forest_life" | "realistic_image/mystic_naturalism" | "realistic_image/natural_tones" | "realistic_image/organic_calm" | "realistic_image/real_life_glow" | "realistic_image/retro_realism" | "realistic_image/retro_snapshot" | "realistic_image/urban_drama" | "realistic_image/village_realism" | "realistic_image/warm_folk" | "digital_illustration/pixel_art" | "digital_illustration/hand_drawn" | "digital_illustration/grain" | "digital_illustration/infantile_sketch" | "digital_illustration/2d_art_poster" | "digital_illustration/handmade_3d" | "digital_illustration/hand_drawn_outline" | "digital_illustration/engraving_color" | "digital_illustration/2d_art_poster_2" | "digital_illustration/antiquarian" | "digital_illustration/bold_fantasy" | "digital_illustration/child_book" | "digital_illustration/child_books" | "digital_illustration/cover" | "digital_illustration/crosshatch" | "digital_illustration/digital_engraving" | "digital_illustration/expressionism" | "digital_illustration/freehand_details" | "digital_illustration/grain_20" | "digital_illustration/graphic_intensity" | "digital_illustration/hard_comics" | "digital_illustration/long_shadow" | "digital_illustration/modern_folk" | "digital_illustration/multicolor" | "digital_illustration/neon_calm" | "digital_illustration/noir" | "digital_illustration/nostalgic_pastel" | "digital_illustration/outline_details" | "digital_illustration/pastel_gradient" | "digital_illustration/pastel_sketch" | "digital_illustration/pop_art" | "digital_illustration/pop_renaissance" | "digital_illustration/street_art" | "digital_illustration/tablet_sketch" | "digital_illustration/urban_glow" | "digital_illustration/urban_sketching" | "digital_illustration/vanilla_dreams" | "digital_illustration/young_adult_book" | "digital_illustration/young_adult_book_2" | "vector_illustration/bold_stroke" | "vector_illustration/chemistry" | "vector_illustration/colored_stencil" | "vector_illustration/contour_pop_art" | "vector_illustration/cosmics" | "vector_illustration/cutout" | "vector_illustration/depressive" | "vector_illustration/editorial" | "vector_illustration/emotional_flat" | "vector_illustration/infographical" | "vector_illustration/marker_outline" | "vector_illustration/mosaic" | "vector_illustration/naivector" | "vector_illustration/roundish_flat" | "vector_illustration/segmented_colors" | "vector_illustration/sharp_contrast" | "vector_illustration/thin" | "vector_illustration/vector_photo" | "vector_illustration/vivid_shapes" | "vector_illustration/engraving" | "vector_illustration/line_art" | "vector_illustration/line_circuit" | "vector_illustration/linocut"; /** * The ID of the custom style reference (optional) */ style_id?: string; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type RecraftV4TextToImageInput = { /** * The preferable background color of the generated images. */ background_color?: RGBColor; /** * An array of preferable colors */ colors?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Default value: `square_hd` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * */ prompt: string; }; export type ReferenceToVideoFlashInput = { /** * The aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:3" | "3:4"; /** * Duration of the generated video in seconds. R2V Flash supports only 5 or 10 seconds. Default value: `"5"` */ duration?: "5" | "10"; /** * Whether to generate a video with audio. Set to false for silent video generation. Silent videos are faster and cost 25% of the audio version price. Default value: `true` */ enable_audio?: boolean; /** * Whether to enable prompt rewriting using LLM. Default value: `true` */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Reference images for subject consistency (0-5 images). Combined with video_urls, total references cannot exceed 5. Formats: JPEG, JPG, PNG (no alpha), BMP, WEBP. Resolution: 240-5000px. Max 10MB each. Reference order: image_urls continue numbering after video_urls. */ image_urls?: Array; /** * When true (default), enables intelligent multi-shot segmentation for coherent narrative videos with multiple shots. When false, generates single continuous shot. Only active when enable_prompt_expansion is True. Default value: `true` */ multi_shots?: boolean; /** * Negative prompt to describe content to avoid. Max 500 characters. Default value: `""` */ negative_prompt?: string; /** * Use Character1, Character2, etc. to reference subjects from your reference files. Works for people, animals, or objects. For multi-shot prompts: '[0-3s] Shot 1. [3-6s] Shot 2.' Max 1500 characters. Reference order: video_urls first, then image_urls. */ prompt: string; /** * Video resolution tier. R2V Flash only supports 720p and 1080p. Default value: `"1080p"` */ resolution?: "720p" | "1080p"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; /** * Reference videos for subject consistency (0-3 videos). Videos' FPS must be at least 16 FPS. Combined with image_urls, total references cannot exceed 5. Reference order: video_urls are numbered first (Character1, Character2...), then image_urls continue the sequence. */ video_urls?: Array; }; export type ReferenceToVideoInput = { /** * The aspect ratio of the generated video. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "1:1" | "4:3" | "3:4"; /** * Duration of the generated video in seconds. R2V supports only 5 or 10 seconds (no 15s). Default value: `"5"` */ duration?: "5" | "10"; /** * Whether to enable prompt rewriting using LLM. Default value: `true` */ enable_prompt_expansion?: boolean; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * When true (default), enables intelligent multi-shot segmentation for coherent narrative videos with multiple shots. When false, generates single continuous shot. Only active when enable_prompt_expansion is True. Default value: `true` */ multi_shots?: boolean; /** * Negative prompt to describe content to avoid. Max 500 characters. Default value: `""` */ negative_prompt?: string; /** * Use @Video1, @Video2, @Video3 to reference subjects from your videos. Works for people, animals, or objects. For multi-shot prompts: '[0-3s] Shot 1. [3-6s] Shot 2.' Max 800 characters. */ prompt: string; /** * Video resolution tier. R2V only supports 720p and 1080p (no 480p). Default value: `"1080p"` */ resolution?: "720p" | "1080p"; /** * Random seed for reproducibility. If None, a random seed is chosen. */ seed?: number; /** * Reference videos for subject consistency (1-3 videos). Videos' FPS must be at least 16 FPS.Reference in prompt as @Video1, @Video2, @Video3. Works for people, animals, or objects. */ video_urls: Array; }; export type RefocusInput = { /** * Blur strength (K value). Higher values produce stronger bokeh/defocus. Set to 0 to only perform all-in-focus (deblur) estimation without bokeh. Default value: `8` */ blur_strength?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The [x, y] coordinates of the focus point as fractions of image dimensions (0.0 to 1.0). For example, [0.5, 0.5] means center. If not provided, the center of the image is used. */ focus_point?: Array; /** * The URL of the image to refocus. */ image_url: string | Blob | File; /** * The number of inference steps to perform. Lower values are faster but may reduce quality. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The same seed and the same input given to the same version of the model will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Resize the image so the longer side matches this value (in pixels). If not set, the original resolution is used (aligned to 16px). Recommended range: 512 to 2000. Default value: `512` */ target_long_side?: number; }; export type ReframeInput = { /** * The desired aspect ratio for the reframed image. Default value: `"16:9"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "3:2" | "1:1" | "2:3" | "3:4" | "9:16" | "9:21"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * URL of the old or damaged photo to restore. */ image_url: string | Blob | File; /** * Number of inference steps for sampling. Default value: `30` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The safety tolerance level for the generated image. 1 being the most strict and 6 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** * The same seed and the same prompt given to the same version of the model will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Region = { /** * X-coordinate of the top-left corner */ x1: number; /** * X-coordinate of the bottom-right corner */ x2: number; /** * Y-coordinate of the top-left corner */ y1: number; /** * Y-coordinate of the bottom-right corner */ y2: number; }; export type RegistryImageFastSdxlModelsImage = { /** * Default value: `"image/jpeg"` */ content_type?: string; /** * */ height: number; /** * */ url: string; /** * */ width: number; }; export type Reimagine32Input = { /** * Aspect ratio. Options: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9 Default value: `"1:1"` */ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9"; /** * Canny edge control image (file or URL). Default value: `""` */ canny_image_url?: string | Blob | File; /** * Canny image preprocess. Default value: `true` */ canny_preprocess?: boolean; /** * Canny edge control strength (0.0 to 1.0). Default value: `0.5` */ canny_scale?: number; /** * Depth control image (file or URL). Default value: `""` */ depth_image_url?: string | Blob | File; /** * Depth image preprocess. Default value: `true` */ depth_preprocess?: boolean; /** * Depth control strength (0.0 to 1.0). Default value: `0.5` */ depth_scale?: number; /** * Guidance scale for text. Default value: `5` */ guidance_scale?: number; /** * Negative prompt for image generation. Default value: `"Logo,Watermark,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"` */ negative_prompt?: string; /** * Number of inference steps. Default value: `30` */ num_inference_steps?: number; /** * Prompt for image generation. */ prompt: string; /** * Whether to improve the prompt. Default value: `true` */ prompt_enhancer?: boolean; /** * Random seed for reproducibility. Default value: `5555` */ seed?: number; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; /** * Whether to truncate the prompt. Default value: `true` */ truncate_prompt?: boolean; }; export type ReimagineInput = { /** * Whether to use the fast model Default value: `true` */ fast?: boolean; /** * The number of iterations the model goes through to refine the generated image. This parameter is optional. Default value: `30` */ num_inference_steps?: number; /** * How many images you would like to generate. When using any Guidance Method, Value is set to 1. Default value: `1` */ num_results?: number; /** * The prompt you would like to use to generate images. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The URL of the structure reference image. Use "" to leave empty. Accepted formats are jpeg, jpg, png, webp. Default value: `""` */ structure_image_url?: string | Blob | File; /** * The influence of the structure reference on the generated image. Default value: `0.75` */ structure_ref_influence?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type RelightingInput = { /** * Aspect ratio for 4K output */ aspect_ratio?: AspectRatio; /** * Image URL for relighting */ image_url: string | Blob | File; /** * Default value: `"natural"` */ lighting_style?: "natural" | "studio" | "golden_hour" | "blue_hour" | "dramatic" | "soft" | "hard" | "backlight" | "side_light" | "front_light" | "rim_light" | "sunset" | "sunrise" | "neon" | "candlelight" | "moonlight" | "spotlight" | "ambient"; }; export type RelightInput = { /** * The source image. */ image_url: string | Blob | File; /** * Where the light comes from. */ light_direction: "front" | "side" | "bottom" | "top-down"; /** * The quality/style/time of day. */ light_type: "midday" | "blue hour light" | "low-angle sunlight" | "sunrise light" | "spotlight on subject" | "overcast light" | "soft overcast daylight lighting" | "cloud-filtered lighting" | "fog-diffused lighting" | "moonlight lighting" | "starlight nighttime" | "soft bokeh lighting" | "harsh studio lighting"; }; export type RelightParameters = { /** * Direction of the light source (used for IC-light). Default value: `"Left"` */ bg_source?: "Left" | "Right" | "Top" | "Bottom"; /** * Classifier-free guidance scale for relighting. Default value: `2` */ cfg?: number; /** * Text prompt describing the desired lighting condition. */ relight_prompt: string; /** * Whether to use sky masking for outdoor scenes. */ use_sky_mask?: boolean; }; export type RemeshingInput = { /** * Number of faces for remesh Default value: `5000` */ faces?: number; /** * Merge duplicate vertices before exporting Default value: `true` */ merge?: boolean; /** * Path for the object file to be remeshed. */ object_url: string | Blob | File; /** * Output format for the 3D model. Default value: `"glb"` */ output_format?: "glb" | "fbx" | "obj" | "stl" | "usdc"; /** * Preserve UVs during remeshing Default value: `true` */ preserve_uvs?: boolean; }; export type RemeshInput = { /** * URL or base64 data URI of a 3D model to remesh. Supports .glb, .gltf, .obj, .fbx, .stl formats. Can be a publicly accessible URL or data URI with MIME type application/octet-stream. */ model_url: string | Blob | File; /** * Position of the origin. None means no effect. */ origin_at?: "bottom" | "center"; /** * Resize the model to a certain height measured in meters. Set to 0 for no resizing. */ resize_height?: number; /** * List of target formats for the remeshed model. */ target_formats?: Array<"glb" | "fbx" | "obj" | "usdz" | "blend" | "stl">; /** * Target number of polygons in the generated model. Actual count may vary based on geometry complexity. Default value: `30000` */ target_polycount?: number; /** * Specify the topology of the generated model. Quad for smooth surfaces, Triangle for detailed geometry. Default value: `"triangle"` */ topology?: "quad" | "triangle"; }; export type RemeshOutput = { /** * Remeshed 3D object in GLB format (if GLB was requested). */ model_glb?: File; /** * URLs for different 3D model formats */ model_urls: ModelUrls; }; export type RemixImageInput = { /** * The aspect ratio of the generated image Default value: `"1:1"` */ aspect_ratio?: "10:16" | "16:10" | "9:16" | "16:9" | "4:3" | "3:4" | "1:1" | "1:3" | "3:1" | "3:2" | "2:3"; /** * Whether to expand the prompt with MagicPrompt functionality. Default value: `true` */ expand_prompt?: boolean; /** * The image URL to remix */ image_url: string | Blob | File; /** * The prompt to remix the image with */ prompt: string; /** * Seed for the random number generator */ seed?: number; /** * Strength of the input image in the remix Default value: `0.8` */ strength?: number; /** * The style of the generated image Default value: `"auto"` */ style?: "auto" | "general" | "realistic" | "design" | "render_3D" | "anime"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type RemixInput = { /** * Whether to delete the video after generation for privacy reasons. If True, the video cannot be used for remixing and will be permanently deleted. Default value: `true` */ delete_video?: boolean; /** * Updated text prompt that directs the remix generation */ prompt: string; /** * The video_id from a previous Sora 2 generation. Note: You can only remix videos that were generated by Sora (via text-to-video or image-to-video endpoints), not arbitrary uploaded videos. */ video_id: string; }; export type RemixOutput = { /** * Spritesheet image for the video */ spritesheet?: ImageFile; /** * Thumbnail image for the video */ thumbnail?: ImageFile; /** * The generated video */ video: VideoFile; /** * The ID of the generated video */ video_id: string; }; export type RemoveLightingInput = { /** * Acceleration level for image generation. 'regular' balances speed and quality. Default value: `"regular"` */ acceleration?: "none" | "regular"; /** * Whether to enable the safety checker for the generated image. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale. Controls how closely the model follows the prompt. Default value: `1` */ guidance_scale?: number; /** * The size of the generated image. If not provided, the size of the final input image will be used. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image with lighting/shadows to remove. */ image_urls: Array; /** * The negative prompt for the generation Default value: `" "` */ negative_prompt?: string; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `6` */ num_inference_steps?: number; /** * The format of the output image Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * Random seed for reproducibility. Same seed with same prompt will produce same result. */ seed?: number; /** * If `True`, the media will be returned as a data URI and won't be saved in history. */ sync_mode?: boolean; }; export type ReplaceBackgroundInput = { /** * Reference image (file or URL). Default value: `"https://v3b.fal.media/files/b/0a8bea8c/Mztgx0NG3HPdby-4iPqwH_a_coffee_machine_standing_in_the_kitchen.png"` */ image_url?: string | Blob | File; /** * Negative prompt for background replacement. Default value: `""` */ negative_prompt?: string; /** * Prompt for background replacement. */ prompt?: string; /** * Random seed for reproducibility. Default value: `4925634` */ seed?: number; /** * Number of inference steps. Default value: `30` */ steps_num?: number; /** * If true, returns the image directly in the response (increases latency). */ sync_mode?: boolean; }; export type ReplaceBackgroundOutput = { /** * Generated image. */ image: Image; /** * Generated images. */ images?: Array; }; export type ReseasonInput = { /** * The source image. */ image_url: string | Blob | File; /** * The desired season. */ season: "spring" | "summer" | "autumn" | "winter"; }; export type Resolution = { /** * Display aspect ratio (e.g., '16:9') */ aspect_ratio: string; /** * Height of the video in pixels */ height: number; /** * Width of the video in pixels */ width: number; }; export type RestyletInput = { /** * The source image. */ image_url: string | Blob | File; /** * Select the desired artistic style for the output image. */ style: "3D Render" | "Cubism" | "Oil Painting" | "Anime" | "Cartoon" | "Coloring Book" | "Retro Ad" | "Pop Art Halftone" | "Vector Art" | "Story Board" | "Art Nouveau" | "Cross Etching" | "Wood Cut"; }; export type RetextureInput = { /** * Use the original UV mapping of the model instead of generating new UVs. If the model has no original UV, output quality may be reduced. Default value: `true` */ enable_original_uv?: boolean; /** * Generate PBR Maps (metallic, roughness, normal) in addition to base color. */ enable_pbr?: boolean; /** * If set to true, input data will be checked for safety before processing. Default value: `true` */ enable_safety_checker?: boolean; /** * 2D image to guide the texturing process. Supports .jpg, .jpeg, and .png formats. Required if text_style_prompt is not provided. If both are provided, image_style_url takes precedence. */ image_style_url?: string | Blob | File; /** * URL or base64 data URI of a 3D model to texture. Supports .glb, .gltf, .obj, .fbx, .stl formats. Can be a publicly accessible URL or data URI with MIME type application/octet-stream. */ model_url: string | Blob | File; /** * Describe your desired texture style using text. Maximum 600 characters. Required if image_style_url is not provided. */ text_style_prompt?: string; }; export type RetextureOutput = { /** * The image URL used for texturing (if provided) */ image_style_url?: string | Blob | File; /** * Retextured 3D object in GLB format. */ model_glb: File; /** * URLs for different 3D model formats */ model_urls: ModelUrls; /** * The text prompt used for texturing (if provided) */ text_style_prompt?: string; /** * Array of texture file objects */ texture_urls?: Array; /** * Preview thumbnail of the retextured model */ thumbnail?: File; }; export type RewriteTextInput = { /** * The source image. */ image_url: string | Blob | File; /** * The new text string to appear in the image. */ new_text: string; }; export type RFInversionInput = { /** * Base shift for the scheduled timesteps Default value: `0.5` */ base_shift?: number; /** * The LoRAs to use for the image generation which use a control image. You can use any number of LoRAs * and they will be merged together to generate the final image. */ control_loras?: Array; /** * The controller guidance (gamma) used in the creation of structured noise. Default value: `0.6` */ controller_guidance_forward?: number; /** * The controller guidance (eta) used in the denoising process.Using values closer to 1 will result in an image closer to input. Default value: `0.75` */ controller_guidance_reverse?: number; /** * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. */ controlnet_unions?: Array; /** * The controlnets to use for the image generation. Only one controlnet is supported at the moment. */ controlnets?: Array; /** * EasyControl Inputs to use for image generation. */ easycontrols?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Use an image input to influence the generation. Can be used to fill images in masked areas. */ fill_image?: ImageFillInput; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * URL of image to be edited */ image_url: string | Blob | File; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * Max shift for the scheduled timesteps Default value: `1.15` */ max_shift?: number; /** * The alpha value for NAG. This value is used as a final weighting * factor for steering the normalized guidance (positive and negative prompts) * in the direction of the positive prompt. Higher values will result in less * steering on the normalized guidance where lower values will result in * considering the positive prompt guidance more. Default value: `0.25` */ nag_alpha?: number; /** * The proportion of steps to apply NAG. After the specified proportion * of steps has been iterated, the remaining steps will use original * attention processors in FLUX. Default value: `0.25` */ nag_end?: number; /** * The scale for NAG. Higher values will result in a image that is more distant * to the negative prompt. Default value: `3` */ nag_scale?: number; /** * The tau for NAG. Controls the normalization of the hidden state. * Higher values will result in a less aggressive normalization, * but may also lead to unexpected changes with respect to the original image. * Not recommended to change this value. Default value: `2.5` */ nag_tau?: number; /** * Negative prompt to steer the image generation away from unwanted features. * By default, we will be using NAG for processing the negative prompt. Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. This is always set to 1 for streaming output. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The prompt to edit the image with */ prompt: string; /** * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ reference_end?: number; /** * URL of Image for Reference-Only */ reference_image_url?: string | Blob | File; /** * The percentage of the total timesteps when the reference guidance is to bestarted. */ reference_start?: number; /** * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ reference_strength?: number; /** * Timestep to stop guidance during reverse process. Default value: `8` */ reverse_guidance_end?: number; /** * Scheduler for applying reverse guidance. Default value: `"constant"` */ reverse_guidance_schedule?: "constant" | "linear_increase" | "linear_decrease"; /** * Timestep to start guidance during reverse process. */ reverse_guidance_start?: number; /** * Scheduler for the denoising process. Default value: `"euler"` */ scheduler?: "euler" | "dpmpp_2m"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * Sigmas schedule for the denoising process. */ sigma_schedule?: string; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * Specifies whether beta sigmas ought to be used. */ use_beta_schedule?: boolean; /** * Uses CFG-zero init sampling as in https://arxiv.org/abs/2503.18886. */ use_cfg_zero?: boolean; }; export type RGBColor = { /** * Blue color value */ b?: number; /** * Green color value */ g?: number; /** * Red color value */ r?: number; }; export type rifeInput = { /** * The URL of the second image to use as the ending point for interpolation. */ end_image_url: string | Blob | File; /** * Frames per second for the output video. Only applicable if output_type is 'video'. Default value: `8` */ fps?: number; /** * Whether to include the end image in the output. */ include_end?: boolean; /** * Whether to include the start image in the output. */ include_start?: boolean; /** * The number of frames to generate between the input images. Default value: `1` */ num_frames?: number; /** * The format of the output images. Only applicable if output_type is 'images'. Default value: `"jpeg"` */ output_format?: "png" | "jpeg"; /** * The type of output to generate; either individual images or a video. Default value: `"images"` */ output_type?: "images" | "video"; /** * The URL of the first image to use as the starting point for interpolation. */ start_image_url: string | Blob | File; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type rifeOutput = { /** * The generated frames as individual images. */ images?: Array; /** * The generated video file, if output_type is 'video'. */ video?: File; }; export type RifeVideoInput = { /** * Frames per second for the output video. Only applicable if use_calculated_fps is False. Default value: `8` */ fps?: number; /** * If True, the final frame will be looped back to the first frame to create a seamless loop. If False, the final frame will not loop back. */ loop?: boolean; /** * The number of frames to generate between the input video frames. Default value: `1` */ num_frames?: number; /** * If True, the function will use the calculated FPS of the input video multiplied by the number of frames to determine the output FPS. If False, the passed FPS will be used. Default value: `true` */ use_calculated_fps?: boolean; /** * If True, the input video will be split into scenes before interpolation. This removes smear frames between scenes, but can result in false positives if the scene detection is not accurate. If False, the entire video will be treated as a single scene. */ use_scene_detection?: boolean; /** * The URL of the video to use for interpolation. */ video_url: string | Blob | File; }; export type Rodin3DInput = { /** * Generation add-on features. Default is []. Possible values are HighPack. The HighPack option will provide 4K resolution textures instead of the default 1K, as well as models with high-poly. It will cost triple the billable units. */ addons?: string; /** * An array that specifies the dimensions and scaling factor of the bounding box. Typically, this array contains 3 elements, Length(X-axis), Width(Y-axis) and Height(Z-axis). */ bbox_condition?: Array; /** * For fuse mode, One or more images are required.It will generate a model by extracting and fusing features of objects from multiple images.For concat mode, need to upload multiple multi-view images of the same object and generate the model. (You can upload multi-view images in any order, regardless of the order of view.) Default value: `"concat"` */ condition_mode?: "fuse" | "concat"; /** * Format of the geometry file. Possible values: glb, usdz, fbx, obj, stl. Default is glb. Default value: `"glb"` */ geometry_file_format?: "glb" | "usdz" | "fbx" | "obj" | "stl"; /** * URL of images to use while generating the 3D model. Required for Image-to-3D mode. Optional for Text-to-3D mode. */ input_image_urls?: Array; /** * Material type. Possible values: PBR, Shaded. Default is PBR. Default value: `"PBR"` */ material?: "PBR" | "Shaded"; /** * A textual prompt to guide model generation. Required for Text-to-3D mode. Optional for Image-to-3D mode. Default value: `""` */ prompt?: string; /** * Generation quality. Possible values: high, medium, low, extra-low. Default is medium. Default value: `"medium"` */ quality?: "high" | "medium" | "low" | "extra-low"; /** * Seed value for randomization, ranging from 0 to 65535. Optional. */ seed?: number; /** * When generating the human-like model, this parameter control the generation result to T/A Pose. */ TAPose?: boolean; /** * Tier of generation. For Rodin Sketch, set to Sketch. For Rodin Regular, set to Regular. Default value: `"Regular"` */ tier?: "Regular" | "Sketch"; /** * Whether to export the model using hyper mode. Default is false. */ use_hyper?: boolean; }; export type RodinGen2Input = { /** * The HighPack option will provide 4K resolution textures instead of the default 1K, as well as models with high-poly. It will cost **triple the billable units**. */ addons?: string; /** * An array that specifies the bounding box dimensions [width, height, length]. */ bbox_condition?: Array; /** * Format of the geometry file. Possible values: glb, usdz, fbx, obj, stl. Default is glb. Default value: `"glb"` */ geometry_file_format?: "glb" | "usdz" | "fbx" | "obj" | "stl"; /** * URL of images to use while generating the 3D model. Required for Image-to-3D mode. Up to 5 images allowed. */ input_image_urls?: Array; /** * Material type. PBR: Physically-based materials with realistic lighting. Shaded: Simple materials with baked lighting. All: Both types included. Default value: `"All"` */ material?: "PBR" | "Shaded" | "All"; /** * Generate a preview render image of the 3D model along with the model files. */ preview_render?: boolean; /** * A textual prompt to guide model generation. Optional for Image-to-3D mode - if empty, AI will generate a prompt based on your images. Default value: `""` */ prompt?: string; /** * Combined quality and mesh type selection. Quad = smooth surfaces, Triangle = detailed geometry. These corresponds to `mesh_mode` (if the option contains 'Triangle', mesh_mode is 'Raw', otherwise 'Quad') and `quality_override` (the numeric part of the option) parameters in Hyper3D API. Default value: `"500K Triangle"` */ quality_mesh_option?: "4K Quad" | "8K Quad" | "18K Quad" | "50K Quad" | "2K Triangle" | "20K Triangle" | "150K Triangle" | "500K Triangle"; /** * Seed value for randomization, ranging from 0 to 65535. Optional. */ seed?: number; /** * Generate characters in T-pose or A-pose format, making them easier to rig and animate in 3D software. */ TAPose?: boolean; /** * When enabled, preserves the transparency channel from input images during 3D generation. */ use_original_alpha?: boolean; }; export type RodinGen2TextTo3DInput = { /** * The HighPack option will provide 4K resolution textures instead of the default 1K, as well as models with high-poly. It will cost **triple the billable units**. */ addons?: string; /** * An array that specifies the bounding box dimensions [width, height, length]. */ bbox_condition?: Array; /** * Format of the geometry file. Possible values: glb, usdz, fbx, obj, stl. Default is glb. Default value: `"glb"` */ geometry_file_format?: "glb" | "usdz" | "fbx" | "obj" | "stl"; /** * Material type. PBR: Physically-based materials with realistic lighting. Shaded: Simple materials with baked lighting. All: Both types included. Default value: `"All"` */ material?: "PBR" | "Shaded" | "All"; /** * A textual prompt to guide model generation. Required for Text-to-3D mode. */ prompt: string; /** * Combined quality and mesh type selection. Quad = smooth surfaces, Triangle = detailed geometry. Default value: `"18K Quad"` */ quality_mesh_option?: "4K Quad" | "8K Quad" | "18K Quad" | "50K Quad" | "2K Triangle" | "20K Triangle" | "150K Triangle" | "500K Triangle"; /** * Seed value for randomization, ranging from 0 to 65535. Optional. */ seed?: number; /** * Generate characters in T-pose or A-pose format, making them easier to rig and animate in 3D software. */ TAPose?: boolean; }; export type routerOutput = { /** * Error message if an error occurred */ error?: string; /** * Generated output */ output: string; /** * Whether the output is partial */ partial?: boolean; /** * Generated reasoning for the final answer */ reasoning?: string; /** * Token usage information */ usage?: UsageInfo; }; export type RouterVideoInput = { /** * This sets the upper limit for the number of tokens the model can generate in response. It won't produce more than this limit. The maximum value is the context length minus the prompt length. */ max_tokens?: number; /** * Name of the model to use. Charged based on actual token usage. */ model: string; /** * Prompt to be used for the video processing */ prompt: string; /** * Should reasoning be the part of the final answer. */ reasoning?: boolean; /** * System prompt to provide context or instructions to the model */ system_prompt?: string; /** * This setting influences the variety in the model's responses. Lower values lead to more predictable and typical responses, while higher values encourage more diverse and less common responses. At 0, the model always gives the same response for a given input. Default value: `1` */ temperature?: number; /** * List of URLs or data URIs of video files to process. Supported formats: mp4, mpeg, mov, webm. For Google Gemini on AI Studio, YouTube links are also supported. Mutually exclusive with video_url. */ video_urls?: Array; }; export type RouterVisionInput = { /** * List of image URLs to be processed */ image_urls: Array; /** * This sets the upper limit for the number of tokens the model can generate in response. It won't produce more than this limit. The maximum value is the context length minus the prompt length. */ max_tokens?: number; /** * Name of the model to use. Charged based on actual token usage. */ model: string; /** * Prompt to be used for the image */ prompt: string; /** * Should reasoning be the part of the final answer. */ reasoning?: boolean; /** * System prompt to provide context or instructions to the model */ system_prompt?: string; /** * This setting influences the variety in the model's responses. Lower values lead to more predictable and typical responses, while higher values encourage more diverse and less common responses. At 0, the model always gives the same response for a given input. Default value: `1` */ temperature?: number; }; export type RundiffusionPhotoFluxInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The LoRAs to use for the image generation. You can use any number of LoRAs * and they will be merged together to generate the final image. */ loras?: Array; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * LoRA Scale of the photo lora model Default value: `0.75` */ photo_lora_scale?: number; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type Sa2va4bVideoInput = { /** * Number of frames to sample from the video. If not provided, all frames are sampled. */ num_frames_to_sample?: number; /** * Prompt to be used for the chat completion */ prompt: string; /** * The URL of the input video. */ video_url: string | Blob | File; }; export type sadtalkerInput = { /** * URL of the driven audio */ driven_audio_url: string | Blob | File; /** * The scale of the expression Default value: `1` */ expression_scale?: number; /** * The type of face enhancer to use */ face_enhancer?: "gfpgan"; /** * The resolution of the face model Default value: `"256"` */ face_model_resolution?: "256" | "512"; /** * The style of the pose */ pose_style?: number; /** * The type of preprocessing to use Default value: `"crop"` */ preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; /** * URL of the source image */ source_image_url: string | Blob | File; /** * Whether to use still mode. Fewer head motion, works with preprocess `full`. */ still_mode?: boolean; }; export type SadTalkerRefVideoInput = { /** * URL of the driven audio */ driven_audio_url: string | Blob | File; /** * The scale of the expression Default value: `1` */ expression_scale?: number; /** * The type of face enhancer to use */ face_enhancer?: "gfpgan"; /** * The resolution of the face model Default value: `"256"` */ face_model_resolution?: "256" | "512"; /** * The style of the pose */ pose_style?: number; /** * The type of preprocessing to use Default value: `"crop"` */ preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; /** * URL of the reference video */ reference_pose_video_url: string | Blob | File; /** * URL of the source image */ source_image_url: string | Blob | File; /** * Whether to use still mode. Fewer head motion, works with preprocess `full`. */ still_mode?: boolean; }; export type Sam2AutoSegmentInput = { /** * URL of the image to be automatically segmented */ image_url: string | Blob | File; /** * Minimum area of a mask region. Default value: `100` */ min_mask_region_area?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * Number of points to sample along each side of the image. Default value: `32` */ points_per_side?: number; /** * Threshold for predicted IOU score. Default value: `0.88` */ pred_iou_thresh?: number; /** * Threshold for stability score. Default value: `0.95` */ stability_score_thresh?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type Sam2AutoSegmentOutput = { /** * Combined segmentation mask. */ combined_mask: Image; /** * Individual segmentation masks. */ individual_masks: Array; }; export type SAM2EmbeddingOutput = { /** * Embedding of the image */ embedding_b64: string; }; export type Sam2ImageInput = { /** * Apply the mask on the image. */ apply_mask?: boolean; /** * Coordinates for boxes */ box_prompts?: Array; /** * URL of the image to be segmented */ image_url: string | Blob | File; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * List of prompts to segment the image */ prompts?: Array; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SAM2RLEFileOutput = { /** * Zip file containing per-frame bounding box overlays. */ boundingbox_frames_zip?: File; /** * Run Length Encoding of the mask. */ rle: File; }; export type SAM2RLEOutput = { /** * Zip file containing per-frame bounding box overlays. */ boundingbox_frames_zip?: File; /** * Run Length Encoding of the mask. */ rle: string | Array; }; export type Sam2VideoInput = { /** * Apply the mask on the video. */ apply_mask?: boolean; /** * Return per-frame bounding box overlays as a zip archive. */ boundingbox_zip?: boolean; /** * Coordinates for boxes */ box_prompts?: Array; /** * The URL of the mask to be applied initially. */ mask_url?: string | Blob | File; /** * List of prompts to segment the video */ prompts?: Array; /** * The URL of the video to be segmented. */ video_url: string | Blob | File; }; export type Sam2VideoOutput = { /** * Zip file containing per-frame bounding box overlays. */ boundingbox_frames_zip?: File; /** * The segmented video. */ video: File; }; export type Sam31VideoInput = { /** * Apply the mask on the video. Default value: `true` */ apply_mask?: boolean; /** * List of box prompt coordinates (x_min, y_min, x_max, y_max). */ box_prompts?: Array; /** * Detection confidence threshold (0.0-1.0). Lower = more detections but less precise. Default value: `0.5` */ detection_threshold?: number; /** * Maximum number of objects to track in the video. Default value: `16` */ max_num_objects?: number; /** * List of point prompts */ point_prompts?: Array; /** * Text prompt for segmentation. Use commas to track multiple objects (e.g., 'person, cloth'). Default value: `""` */ prompt?: string; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)"; /** * The URL of the video to be segmented. */ video_url: string | Blob | File; }; export type Sam31VideoRleInput = { /** * Apply the mask on the video. */ apply_mask?: boolean; /** * Return per-frame bounding box overlays as a zip archive. */ boundingbox_zip?: boolean; /** * List of box prompts with optional frame_index. */ box_prompts?: Array; /** * Detection confidence threshold (0.0-1.0). Lower = more detections but less precise. Defaults: 0.5 for existing, 0.7 for new objects. Try 0.2-0.3 if text prompts fail. Default value: `0.5` */ detection_threshold?: number; /** * Frame index used for initial interaction when mask_url is provided. */ frame_index?: number; /** * The URL of the mask to be applied initially. */ mask_url?: string | Blob | File; /** * Maximum number of objects to track in the video. Default value: `16` */ max_num_objects?: number; /** * List of point prompts with frame indices. */ point_prompts?: Array; /** * Text prompt for segmentation. Use commas to track multiple objects (e.g., 'person, cloth'). Default value: `""` */ prompt?: string; /** * The URL of the video to be segmented. */ video_url: string | Blob | File; }; export type Sam33dAlignInput = { /** * URL of the human mask image. If not provided, uses full image. */ body_mask_url?: string | Blob | File; /** * URL of the SAM-3D Body mesh file (.ply or .glb) to align */ body_mesh_url: string | Blob | File; /** * Focal length from SAM-3D Body metadata. If not provided, estimated from MoGe. */ focal_length?: number; /** * URL of the original image used for MoGe depth estimation */ image_url: string | Blob | File; /** * Optional URL of SAM-3D Object mesh (.glb) to create combined scene */ object_mesh_url?: string | Blob | File; }; export type Sam33dAlignOutput = { /** * Aligned body mesh in PLY format */ body_mesh_ply: File; /** * Alignment info (scale, translation, etc.) */ metadata: SAM3DBodyAlignmentInfo; /** * Aligned body mesh in GLB format (for 3D preview) */ model_glb: File; /** * Combined scene with body + object meshes in GLB format (only when object_mesh_url provided) */ scene_glb?: File; /** * Visualization of aligned mesh overlaid on input image */ visualization: File; }; export type SAM3DBodyAlignmentInfo = { /** * Number of cropped vertices */ cropped_vertices_count: number; /** * Focal length used */ focal_length: number; /** * Index of the person */ person_id: number; /** * Scale factor applied for alignment */ scale_factor: number; /** * Number of target points for alignment */ target_points_count: number; /** * Translation [tx, ty, tz] */ translation: Array; }; export type SAM3DBodyInput = { /** * Export individual mesh files (.ply) per person Default value: `true` */ export_meshes?: boolean; /** * URL of the image containing humans */ image_url: string | Blob | File; /** * Include 3D keypoint markers (spheres) in the GLB mesh for visualization Default value: `true` */ include_3d_keypoints?: boolean; /** * Optional URL of a binary mask image (white=person, black=background). When provided, skips auto human detection and uses this mask instead. Bbox is auto-computed from the mask. */ mask_url?: string | Blob | File; }; export type SAM3DBodyMetadata = { /** * Number of people detected */ num_people: number; /** * Per-person metadata */ people: Array; }; export type SAM3DBodyOutput = { /** * Individual mesh files (.ply), one per detected person (when export_meshes=True) */ meshes?: Array; /** * Structured metadata including keypoints and camera parameters */ metadata: SAM3DBodyMetadata; /** * 3D body mesh in GLB format with optional 3D keypoint markers */ model_glb: File; /** * Combined visualization image (original + keypoints + mesh + side view) */ visualization: File; }; export type SAM3DBodyPersonMetadata = { /** * Bounding box [x_min, y_min, x_max, y_max] */ bbox: Array; /** * Estimated focal length */ focal_length: number; /** * 2D keypoints [[x, y], ...] - 70 body keypoints */ keypoints_2d: Array>; /** * 3D keypoints [[x, y, z], ...] - 70 body keypoints in camera space */ keypoints_3d?: Array>; /** * Index of the person in the scene */ person_id: number; /** * Predicted camera translation [tx, ty, tz] */ pred_cam_t: Array; }; export type SAM3DObjectInput = { /** * Box prompts for auto-segmentation when no masks provided. Multiple boxes supported - each produces a separate object mask for 3D reconstruction. */ box_prompts?: Array; /** * Detection confidence threshold (0.1-1.0). Lower = more detections but less precise. If not set, uses the model's default. */ detection_threshold?: number; /** * If True, exports GLB with baked texture and UVs instead of vertex colors. */ export_textured_glb?: boolean; /** * URL of the image to reconstruct in 3D */ image_url: string | Blob | File; /** * Optional list of mask URLs (one per object). If not provided, use prompt/point_prompts/box_prompts to auto-segment, or entire image will be used. */ mask_urls?: Array; /** * Point prompts for auto-segmentation when no masks provided */ point_prompts?: Array; /** * Optional URL to external pointmap/depth data (NPY or NPZ format) for improved 3D reconstruction depth estimation */ pointmap_url?: string | Blob | File; /** * Text prompt for auto-segmentation when no masks provided (e.g., 'chair', 'lamp') Default value: `"car"` */ prompt?: string; /** * Random seed for reproducibility */ seed?: number; }; export type SAM3DObjectMetadata = { /** * Camera pose matrix */ camera_pose?: Array>; /** * Index of the object in the scene */ object_index: number; /** * Rotation quaternion [x, y, z, w] */ rotation?: Array>; /** * Scale factors [sx, sy, sz] */ scale?: Array>; /** * Translation [tx, ty, tz] */ translation?: Array>; }; export type SAM3DObjectOutput = { /** * Zip bundle containing all artifacts and metadata */ artifacts_zip?: File; /** * Gaussian splat file (.ply) - combined scene splat for multi-object, single splat otherwise */ gaussian_splat: File; /** * Individual GLB mesh files per object (only for multi-object scenes) */ individual_glbs?: Array; /** * Individual Gaussian splat files per object (only for multi-object scenes) */ individual_splats?: Array; /** * Per-object metadata (rotation/translation/scale) */ metadata: Array; /** * 3D mesh in GLB format - combined scene for multi-object, single mesh otherwise */ model_glb?: File; }; export type Sam3ImageInput = { /** * Apply the mask on the image. Default value: `true` */ apply_mask?: boolean; /** * Box prompt coordinates (x_min, y_min, x_max, y_max). Multiple boxes supported - use object_id to group boxes for the same object or leave empty for separate objects. */ box_prompts?: Array; /** * URL of the image to be segmented */ image_url: string | Blob | File; /** * Whether to include bounding boxes for each mask (when available). */ include_boxes?: boolean; /** * Whether to include mask confidence scores. */ include_scores?: boolean; /** * Maximum number of masks to return when `return_multiple_masks` is enabled. Default value: `3` */ max_masks?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png" | "webp"; /** * List of point prompts */ point_prompts?: Array; /** * Text prompt for segmentation Default value: `"wheel"` */ prompt?: string; /** * If True, upload and return multiple generated masks as defined by `max_masks`. */ return_multiple_masks?: boolean; /** * If True, the media will be returned as a data URI. */ sync_mode?: boolean; }; export type Sam3ImageOutput = { /** * Per-mask normalized bounding boxes [cx, cy, w, h] when requested. */ boxes?: Array>; /** * Primary segmented mask preview. */ image?: Image; /** * Segmented mask images. */ masks: Array; /** * Per-mask metadata including scores and boxes. */ metadata?: Array; /** * Per-mask confidence scores when requested. */ scores?: Array; }; export type SAM3RLEFileOutput = { /** * Zip file containing per-frame bounding box overlays. */ boundingbox_frames_zip?: File; /** * Per-mask normalized bounding boxes [cx, cy, w, h] when requested. */ boxes?: Array>; /** * Per-mask metadata when multiple RLEs are returned. */ metadata?: Array; /** * Run Length Encoding of the mask. */ rle: File; /** * Per-mask confidence scores when requested. */ scores?: Array; }; export type SAM3RLEOutput = { /** * Zip file containing per-frame bounding box overlays. */ boundingbox_frames_zip?: File; /** * Per-mask normalized bounding boxes [cx, cy, w, h] when requested. */ boxes?: Array>; /** * Per-mask metadata when multiple RLEs are returned. */ metadata?: Array; /** * Run Length Encoding of the mask. */ rle: string | Array; /** * Per-mask confidence scores when requested. */ scores?: Array; }; export type Sam3VideoInput = { /** * Apply the mask on the video. Default value: `true` */ apply_mask?: boolean; /** * List of box prompt coordinates (x_min, y_min, x_max, y_max). */ box_prompts?: Array; /** * Detection confidence threshold (0.0-1.0). Lower = more detections but less precise. Default value: `0.5` */ detection_threshold?: number; /** * List of point prompts */ point_prompts?: Array; /** * Text prompt for segmentation. Use commas to track multiple objects (e.g., 'person, cloth'). Default value: `""` */ prompt?: string; /** * The output type of the generated video. Default value: `"X264 (.mp4)"` */ video_output_type?: "X264 (.mp4)" | "VP9 (.webm)"; /** * The URL of the video to be segmented. */ video_url: string | Blob | File; }; export type Sam3VideoRleInput = { /** * Apply the mask on the video. */ apply_mask?: boolean; /** * Return per-frame bounding box overlays as a zip archive. */ boundingbox_zip?: boolean; /** * List of box prompts with optional frame_index. */ box_prompts?: Array; /** * Detection confidence threshold (0.0-1.0). Lower = more detections but less precise. Defaults: 0.5 for existing, 0.7 for new objects. Try 0.2-0.3 if text prompts fail. Default value: `0.5` */ detection_threshold?: number; /** * Frame index used for initial interaction when mask_url is provided. */ frame_index?: number; /** * The URL of the mask to be applied initially. */ mask_url?: string | Blob | File; /** * List of point prompts with frame indices. */ point_prompts?: Array; /** * Text prompt for segmentation. Use commas to track multiple objects (e.g., 'person, cloth'). Default value: `""` */ prompt?: string; /** * The URL of the video to be segmented. */ video_url: string | Blob | File; }; export type SAMAudioInput = { /** * The acceleration level to use. Default value: `"balanced"` */ acceleration?: "fast" | "balanced" | "quality"; /** * URL of the audio file to process (WAV, MP3, FLAC supported) */ audio_url: string | Blob | File; /** * Overlap duration (in seconds) between chunks for crossfade blending. Default value: `5` */ chunk_overlap?: number; /** * Maximum audio duration (in seconds) to process in a single pass. Longer audio will be chunked with overlap and blended. Default value: `60` */ max_chunk_duration?: number; /** * Output audio format. Default value: `"wav"` */ output_format?: "wav" | "mp3"; /** * Automatically predict temporal spans where the target sound occurs. */ predict_spans?: boolean; /** * Text prompt describing the sound to isolate. */ prompt: string; /** * Number of candidates to generate and rank. Higher improves quality but increases latency and cost. Default value: `1` */ reranking_candidates?: number; }; export type SAMAudioJudgeInput = { /** * URL of the original audio file. */ original_audio_url: string | Blob | File; /** * Text prompt of the target sound that was separated. */ prompt: string; /** * URL of the separated audio file to evaluate. */ separated_audio_url: string | Blob | File; }; export type SAMAudioJudgeOutput = { /** * Faithfulness score - overall separation quality (1-5 scale) */ faithfulness: number; /** * Overall separation quality score (1-5 scale) */ overall: number; /** * Precision score - how much of the output is the target sound (1-5 scale) */ precision: number; /** * Recall score - how much of the target sound was recovered (1-5 scale) */ recall: number; }; export type SamAudioSeparateOutput = { /** * Duration of the output audio in seconds. */ duration: number; /** * Everything else in the audio. */ residual: File; /** * Sample rate of the output audio in Hz. Default value: `48000` */ sample_rate?: number; /** * The isolated target sound. */ target: File; }; export type SAMAudioSpanInput = { /** * The acceleration level to use. Default value: `"balanced"` */ acceleration?: "fast" | "balanced" | "quality"; /** * URL of the audio file to process. */ audio_url: string | Blob | File; /** * Overlap duration (in seconds) between chunks for crossfade blending. Default value: `5` */ chunk_overlap?: number; /** * Maximum audio duration (in seconds) to process in a single pass. Longer audio will be chunked with overlap and blended. Default value: `60` */ max_chunk_duration?: number; /** * Output audio format. Default value: `"wav"` */ output_format?: "wav" | "mp3"; /** * Text prompt describing the sound to isolate. Optional but recommended - helps the model identify what type of sound to extract from the span. */ prompt?: string; /** * Number of candidates to generate and rank. Higher improves quality but increases latency and cost. Requires text prompt; ignored for span-only separation. Default value: `1` */ reranking_candidates?: number; /** * Time spans where the target sound occurs which should be isolated. */ spans: Array; /** * Trim output audio to only include the specified span time range. If False, returns the full audio length with the target sound isolated throughout. */ trim_to_span?: boolean; /** * Use sound activity detection to rank reranking candidates based on how well each candidate's non-silent regions match the provided spans. Enables effective reranking even without a text prompt (span-only separation). Requires reranking_candidates > 1. */ use_sound_activity_ranking?: boolean; }; export type SAMAudioVisualInput = { /** * The acceleration level to use. Default value: `"balanced"` */ acceleration?: "fast" | "balanced" | "quality"; /** * Overlap duration (in seconds) between chunks for crossfade blending. Default value: `5` */ chunk_overlap?: number; /** * URL of the mask video (binary mask indicating target object). Black=target, White=background. */ mask_video_url?: string | Blob | File; /** * Maximum audio duration (in seconds) to process in a single pass. Longer audio will be chunked with overlap and blended. Default value: `60` */ max_chunk_duration?: number; /** * Output audio format. Default value: `"wav"` */ output_format?: "wav" | "mp3"; /** * Text prompt to assist with separation. Use natural language to describe the target sound. Default value: `""` */ prompt?: string; /** * Number of candidates to generate and rank. Higher improves quality but increases latency and cost. Default value: `1` */ reranking_candidates?: number; /** * URL of the video file to process (MP4, MOV, etc.) */ video_url: string | Blob | File; }; export type SamInput = { /** * Attempt better quality output using morphologyEx */ better_quality?: boolean; /** * Output black and white, multiple masks will be combined into one mask */ black_white?: boolean; /** * Coordinates for multiple boxes, e.g. [[x,y,w,h],[x2,y2,w2,h2]] */ box_prompt?: Array>; /** * Object confidence threshold Default value: `0.4` */ confidence?: number; /** * Url to input image */ image_url: string | Blob | File; /** * Invert mask colors */ invert?: boolean; /** * IOU threshold for filtering the annotations Default value: `0.9` */ iou?: number; /** * Label for point, [1,0], 0 = background, 1 = foreground */ point_label?: Array; /** * Coordinates for multiple points [[x1,y1],[x2,y2]] */ point_prompt?: Array>; /** * Draw high-resolution segmentation masks Default value: `true` */ retina?: boolean; /** * Image size Default value: `1024` */ size?: number; /** * The prompt to use when generating masks */ text_prompt?: string; /** * Draw the edges of the masks */ with_contours?: boolean; }; export type SamOutput = { /** * Combined image of all detected masks */ image?: Image; }; export type SanaImageToVideoInput = { /** * The aspect ratio of the output video. Only used when resolution is '720p'. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "1:1"; /** * Enable safety checking of the generated video. Default value: `true` */ enable_safety_checker?: boolean; /** * Frames per second for the output video. Default value: `16` */ frames_per_second?: number; /** * Guidance scale for generation (higher = more prompt adherence). Default value: `6` */ guidance_scale?: number; /** * The input image URL to animate. */ image_url: string | Blob | File; /** * Motion intensity score (higher = more motion). Default value: `30` */ motion_score?: number; /** * The negative prompt describing what to avoid in the generation. Default value: `"A chaotic sequence with misshapen, deformed limbs in heavy motion blur, sudden disappearance, jump cuts, jerky movements, rapid shot changes, frames out of sync, inconsistent character shapes, temporal artifacts, jitter, and ghosting effects, creating a disorienting visual experience."` */ negative_prompt?: string; /** * Number of frames to generate. Default value: `81` */ num_frames?: number; /** * Number of denoising steps. Default value: `28` */ num_inference_steps?: number; /** * The text prompt describing how the image should be animated. */ prompt: string; /** * The resolution of the output video. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducible generation. If not provided, a random seed will be used. */ seed?: number; }; export type sanaInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** * The size of the generated image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `18` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"jpeg"` */ output_format?: "jpeg" | "png"; /** * The prompt to generate an image from. */ prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * The style to generate the image in. Default value: `"(No style)"` */ style_name?: "(No style)" | "Cinematic" | "Photographic" | "Anime" | "Manga" | "Digital Art" | "Pixel art" | "Fantasy art" | "Neonpunk" | "3D Model"; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SanaVideoInput = { /** * The aspect ratio of the output video. Only used when resolution is '720p'. Default value: `"16:9"` */ aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "1:1"; /** * Enable safety checking of the generated video. Default value: `true` */ enable_safety_checker?: boolean; /** * Frames per second for the output video. Default value: `16` */ frames_per_second?: number; /** * Guidance scale for generation (higher = more prompt adherence). Default value: `6` */ guidance_scale?: number; /** * Motion intensity score (higher = more motion). Default value: `30` */ motion_score?: number; /** * The negative prompt describing what to avoid in the generation. Default value: `"A chaotic sequence with misshapen, deformed limbs in heavy motion blur, sudden disappearance, jump cuts, jerky movements, rapid shot changes, frames out of sync, inconsistent character shapes, temporal artifacts, jitter, and ghosting effects, creating a disorienting visual experience."` */ negative_prompt?: string; /** * Number of frames to generate. Default value: `81` */ num_frames?: number; /** * Number of denoising steps. Default value: `28` */ num_inference_steps?: number; /** * The text prompt describing the video to generate. */ prompt: string; /** * The resolution of the output video. Default value: `"480p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducible generation. If not provided, a random seed will be used. */ seed?: number; }; export type SanaVideoOutput = { /** * The random seed used for the generation process. */ seed: number; /** * Performance timing breakdown. */ timings: unknown; /** * Generated video file. */ video: File; }; export type scailInput = { /** * The URL of the image to use as a reference for the video generation. */ image_url: string | Blob | File; /** * Enable multi-character mode. Use when driving video has multiple people. */ multi_character?: boolean; /** * The number of inference steps to use for the video generation. Default value: `28` */ num_inference_steps?: number; /** * The prompt to guide video generation. */ prompt: string; /** * Output resolution. Outputs 896x512 (landscape) or 512x896 (portrait) based on the input image aspect ratio. Default value: `"512p"` */ resolution?: string; /** * The URL of the video to use as a reference for the video generation. */ video_url: string | Blob | File; }; export type ScaleVideoInput = { /** * Video codec to use for encoding. libx264 (H.264) is widely compatible, libx265 (H.265/HEVC) offers better compression. Default value: `"libx264"` */ codec?: "libx264" | "libx265"; /** * Constant Rate Factor for quality (0-51). Lower values mean better quality and larger files. 18 is visually lossless for most content. Default value: `18` */ crf?: number; /** * Target height in pixels. If only height is provided, width is auto-calculated to preserve aspect ratio. At least one of width or height must be provided. */ height?: number; /** * Scaling mode. 'stretch' scales the video to the exact target dimensions (may distort aspect ratio). 'pad' scales to fit within the target dimensions while preserving aspect ratio, then pads with the chosen color to fill the remaining space (letterbox/pillarbox). 'crop' scales to cover the target dimensions while preserving aspect ratio, then center-crops to the exact target size. Default value: `"stretch"` */ mode?: "stretch" | "pad" | "crop"; /** * Padding color when mode is 'pad'. Ignored for other modes. Default value: `"black"` */ pad_color?: "black" | "white" | "red" | "green" | "blue" | "gray"; /** * Encoding speed preset. Slower presets give better compression but take longer. Default value: `"fast"` */ preset?: "ultrafast" | "fast" | "medium" | "slow"; /** * URL of the video file to scale/resize. Height and Width of the video must be even numbers for compatibility with video codecs. */ video_url: string | Blob | File; /** * Target width in pixels. If only width is provided, height is auto-calculated to preserve aspect ratio. At least one of width or height must be provided. */ width?: number; }; export type ScaleVideoOutput = { /** * Height of the original video in pixels */ original_height: number; /** * Width of the original video in pixels */ original_width: number; /** * Height of the output video in pixels */ scaled_height: number; /** * Width of the output video in pixels */ scaled_width: number; /** * The scaled/resized video */ video: File; }; export type SceneChangeInfo = { /** * Frame number of the scene change */ frame_number: number; /** * Presentation timestamp in stream time base units */ pts: number; /** * Scene change score (0-1). Higher values indicate a bigger visual change between frames. */ score: number; /** * Timestamp of the scene change in seconds */ timestamp: number; }; export type SceneDetectionInput = { /** * Whether to extract thumbnail images at scene change points. Default value: `true` */ extract_thumbnails?: boolean; /** * Whether to always include the first frame as a scene change. Default value: `true` */ include_first_frame?: boolean; /** * Maximum number of scene changes to return. Default value: `100` */ max_scene_changes?: number; /** * Scene change threshold (0.0-1.0). Lower values detect more scene changes, higher values only detect major changes. Default value: `0.1` */ threshold?: number; /** * URL of the video file to analyze for scene changes */ video_url: string | Blob | File; }; export type SceneDetectionOutput = { /** * Thumbnail images at scene change points (populated when extract_thumbnails=True) */ images?: Array; /** * List of detected scene changes with timing and score information */ scene_changes: Array; /** * Total number of scene changes detected */ scene_count: number; }; export type SchnellReduxInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Default value: `landscape_4_3` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The URL of the image to generate an image from. */ image_url: string | Blob | File; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** * The format of the generated image. Default value: `"png"` */ output_format?: "jpeg" | "png"; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type ScribbleInput = { /** * URL of the image to process */ image_url: string | Blob | File; /** * The model to use for the Scribble detector Default value: `"HED"` */ model?: "HED" | "PiDi"; /** * Whether to use the safe version of the Scribble detector */ safe?: boolean; }; export type Sd15DepthControlnetInput = { /** * The URL of the control image. */ control_image_url: string | Blob | File; /** * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** * If set to true, DeepCache will be enabled. TBD */ enable_deep_cache?: boolean; /** * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The list of LoRA weights to use. */ loras?: Array; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; }; export type SdxlControlnetUnionInput = { /** * The URL of the control image. */ canny_image_url?: string | Blob | File; /** * Whether to preprocess the canny image. Default value: `true` */ canny_preprocess?: boolean; /** * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** * The URL of the control image. */ depth_image_url?: string | Blob | File; /** * Whether to preprocess the depth image. Default value: `true` */ depth_preprocess?: boolean; /** * The list of embeddings to use. */ embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * If set to true, the prompt will be expanded with additional prompts. */ expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * The list of LoRA weights to use. */ loras?: Array; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** * The URL of the control image. */ normal_image_url?: string | Blob | File; /** * Whether to preprocess the normal image. Default value: `true` */ normal_preprocess?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** * The URL of the control image. */ openpose_image_url?: string | Blob | File; /** * Whether to preprocess the openpose image. Default value: `true` */ openpose_preprocess?: boolean; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** * An id bound to a request, can be used with response to identify the request * itself. Default value: `""` */ request_id?: string; /** * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ safety_checker_version?: "v1" | "v2"; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** * The URL of the control image. */ segmentation_image_url?: string | Blob | File; /** * Whether to preprocess the segmentation image. Default value: `true` */ segmentation_preprocess?: boolean; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The URL of the control image. */ teed_image_url?: string | Blob | File; /** * Whether to preprocess the teed image. Default value: `true` */ teed_preprocess?: boolean; }; export type Seed2MiniInput = { /** * URLs of images for visual understanding. Supported formats: JPEG, PNG, WebP. A maximum of 6 images is supported. Any additional images will be ignored. */ image_urls?: Array; /** * Controls the maximum length of the model's output, including both the model's response and its chain-of-thought content, measured in tokens. Default value: `4096` */ max_completion_tokens?: number; /** * Optional prior conversation history for multi-turn conversations. Pass back the `messages` field from a previous response to provide context. The current `prompt`, `image_urls`, `video_urls`, and `system_prompt` are always appended as the latest user turn. */ messages?: Array; /** * The text prompt or question for the model. */ prompt: string; /** * Controls the depth of reasoning before the model responds. Only applicable when `thinking` is `enabled` or `auto`. `minimal` for immediate response, `low` for faster response with light reasoning, `medium` for balanced speed and depth, `high` for deep analysis of complex issues. */ reasoning_effort?: "minimal" | "low" | "medium" | "high"; /** * Optional system prompt to guide the model's behavior. */ system_prompt?: string; /** * Controls randomness in the response. Lower values make output more focused and deterministic, higher values make it more creative. Default value: `1` */ temperature?: number; /** * Controls the model's chain-of-thought reasoning. `enabled` always includes reasoning, `disabled` never includes reasoning, `auto` lets the model decide based on the query. Default value: `"enabled"` */ thinking?: "enabled" | "disabled" | "auto"; /** * Nucleus sampling parameter. The model considers tokens with top_p cumulative probability mass. Lower values narrow the token selection. Default value: `0.7` */ top_p?: number; /** * URLs of videos for video understanding. Supported formats: MP4, MOV. Audio comprehension is not supported. A maximum of 3 videos is supported. Any additional videos will be ignored. */ video_urls?: Array; }; export type Seed2MiniMessage = { /** * The content of the message. Can be a string for text-only messages, or a list of content parts for multimodal messages (e.g. with images). */ content: string | Array; /** * The role of the message author. */ role: "system" | "user" | "assistant"; }; export type Seed2MiniOutput = { /** * The full conversation history including the model's response. Pass this back as the `messages` input field to continue the conversation. */ messages: Array; /** * The model's text response. */ output: string; /** * The model's chain-of-thought reasoning content. Only present when `thinking` is `enabled` or `auto`. */ reasoning_content?: string; }; export type Seed3DImageTo3DOutput = { /** * The generated 3D model files */ model: File; /** * The number of tokens used for the 3D model generation */ usage_tokens: number; }; export type Seedance2I2VInput = { /** * The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to infer from the input image. Default value: `"auto"` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt. Default value: `"auto"` */ duration?: "auto" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * The URL of the image to use as the last frame of the video. When provided, the generated video will transition from the starting image to this ending image. Supported formats: JPEG, PNG, WebP. Max 30 MB. */ end_image_url?: string | Blob | File; /** * Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. When enabled, the cost of the request is doubled. Default value: `true` */ generate_audio?: boolean; /** * The URL of the starting frame image to animate. Supported formats: JPEG, PNG, WebP. Max 30 MB. */ image_url: string | Blob | File; /** * The text prompt describing the desired motion and action for the video. */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for balance. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. Note that results may still vary slightly even with the same seed. */ seed?: number; }; export type Seedance2R2VInput = { /** * The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to let the model decide. Default value: `"auto"` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * Reference audio to guide video generation. Refer to them in the prompt as @Audio1, @Audio2, etc. Supported formats: MP3, WAV. Up to 3 files, combined duration must not exceed 15 seconds. Max 15 MB per file. */ audio_urls?: Array; /** * Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt. Default value: `"auto"` */ duration?: "auto" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. When enabled, the cost of the request is doubled. Default value: `true` */ generate_audio?: boolean; /** * Reference images to guide video generation. Refer to them in the prompt as @Image1, @Image2, etc. Supported formats: JPEG, PNG, WebP. Max 30 MB per image. Up to 9 images. Total files across all modalities must not exceed 12. */ image_urls?: Array; /** * The text prompt used to generate the video. Use @Image1, @Video1, @Audio1 etc. in the prompt to refer to the corresponding reference media. */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for balance. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. Note that results may still vary slightly even with the same seed. */ seed?: number; /** * Reference videos to guide video generation. Refer to them in the prompt as @Video1, @Video2, etc. Supported formats: MP4, MOV. Up to 3 videos, combined duration must be between 2 and 15 seconds, total size under 50 MB. Each video must be between ~480p (640x640) and ~720p (834x1112) in resolution. */ video_urls?: Array; }; export type Seedance2T2VInput = { /** * The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to let the model decide. Default value: `"auto"` */ aspect_ratio?: "auto" | "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16"; /** * Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt. Default value: `"auto"` */ duration?: "auto" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12" | "13" | "14" | "15"; /** * Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. When enabled, the cost of the request is doubled. Default value: `true` */ generate_audio?: boolean; /** * The text prompt used to generate the video */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for balance. Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed for reproducibility. Note that results may still vary slightly even with the same seed. */ seed?: number; }; export type SeedanceImageToVideoInput = { /** * The aspect ratio of the generated video Default value: `"auto"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "auto"; /** * Whether to fix the camera position */ camera_fixed?: boolean; /** * Duration of the video in seconds Default value: `"5"` */ duration?: "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The URL of the image the video ends with. Defaults to None. */ end_image_url?: string | Blob | File; /** * The URL of the image used to generate video */ image_url: string | Blob | File; /** * The number of frames to generate. If provided, will override duration. */ num_frames?: number; /** * The text prompt used to generate the video */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for higher quality Default value: `"720p"` */ resolution?: "480p" | "720p" | "1080p"; /** * Random seed to control video generation. Use -1 for random. */ seed?: number; }; export type SeedanceProFastImageToVideoInput = { /** * The aspect ratio of the generated video Default value: `"auto"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "auto"; /** * Whether to fix the camera position */ camera_fixed?: boolean; /** * Duration of the video in seconds Default value: `"5"` */ duration?: "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The URL of the image used to generate video */ image_url: string | Blob | File; /** * The number of frames to generate. If provided, will override duration. */ num_frames?: number; /** * The text prompt used to generate the video */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for balance, 1080p for higher quality Default value: `"1080p"` */ resolution?: "480p" | "720p" | "1080p"; /** * Random seed to control video generation. Use -1 for random. */ seed?: number; }; export type SeedanceProv15ImageToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "auto"; /** * Whether to fix the camera position */ camera_fixed?: boolean; /** * Duration of the video in seconds Default value: `"5"` */ duration?: "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The URL of the image the video ends with. Defaults to None. */ end_image_url?: string | Blob | File; /** * Whether to generate audio for the video Default value: `true` */ generate_audio?: boolean; /** * The URL of the image used to generate video */ image_url: string | Blob | File; /** * The text prompt used to generate the video */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for balance, 1080p for higher quality Default value: `"720p"` */ resolution?: "480p" | "720p" | "1080p"; /** * Random seed to control video generation. Use -1 for random. */ seed?: number; }; export type SeedanceProv15TextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "auto"; /** * Whether to fix the camera position */ camera_fixed?: boolean; /** * Duration of the video in seconds Default value: `"5"` */ duration?: "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Whether to generate audio for the video Default value: `true` */ generate_audio?: boolean; /** * The text prompt used to generate the video */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for balance, 1080p for higher quality Default value: `"720p"` */ resolution?: "480p" | "720p" | "1080p"; /** * Random seed to control video generation. Use -1 for random. */ seed?: number; }; export type SeedanceReferenceToVideoInput = { /** * The aspect ratio of the generated video Default value: `"auto"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "auto"; /** * Whether to fix the camera position */ camera_fixed?: boolean; /** * Duration of the video in seconds Default value: `"5"` */ duration?: "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The number of frames to generate. If provided, will override duration. */ num_frames?: number; /** * The text prompt used to generate the video */ prompt: string; /** * Reference images to generate the video with. */ reference_image_urls: Array; /** * Video resolution - 480p for faster generation, 720p for higher quality Default value: `"720p"` */ resolution?: "480p" | "720p"; /** * Random seed to control video generation. Use -1 for random. */ seed?: number; }; export type SeedanceTextToVideoInput = { /** * The aspect ratio of the generated video Default value: `"16:9"` */ aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "9:21"; /** * Whether to fix the camera position */ camera_fixed?: boolean; /** * Duration of the video in seconds Default value: `"5"` */ duration?: "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10" | "11" | "12"; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The number of frames to generate. If provided, will override duration. */ num_frames?: number; /** * The text prompt used to generate the video */ prompt: string; /** * Video resolution - 480p for faster generation, 720p for higher quality Default value: `"720p"` */ resolution?: "480p" | "720p" | "1080p"; /** * Random seed to control video generation. Use -1 for random. */ seed?: number; }; export type SeedDream45EditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Width and height must be between 1920 and 4096, or total number of pixels must be between 2560*1440 and 4096*4096. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "auto_2K" | "auto_4K"; /** * List of URLs of input images for editing. Presently, up to 10 image inputs are allowed. If over 10 images are sent, only the last 10 will be used. */ image_urls: Array; /** * If set to a number greater than one, enables multi-image generation. The model will potentially return up to `max_images` images every generation, and in total, `num_images` generations will be carried out. In total, the number of images generated will be between `num_images` and `max_images*num_images`. The total number of images (image inputs + image outputs) must not exceed 15 Default value: `1` */ max_images?: number; /** * Number of separate model generations to be run with the prompt. Default value: `1` */ num_images?: number; /** * The text prompt used to edit the image */ prompt: string; /** * Random seed to control the stochasticity of image generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedDream45T2IInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Width and height must be between 1920 and 4096, or total number of pixels must be between 2560*1440 and 4096*4096. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "auto_2K" | "auto_4K"; /** * If set to a number greater than one, enables multi-image generation. The model will potentially return up to `max_images` images every generation, and in total, `num_images` generations will be carried out. In total, the number of images generated will be between `num_images` and `max_images*num_images`. Default value: `1` */ max_images?: number; /** * Number of separate model generations to be run with the prompt. Default value: `1` */ num_images?: number; /** * The text prompt used to generate the image */ prompt: string; /** * Random seed to control the stochasticity of image generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedDream4EditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The mode to use for enhancing prompt enhancement. Standard mode provides higher quality results but takes longer to generate. Fast mode provides average quality results but takes less time to generate. Default value: `"standard"` */ enhance_prompt_mode?: "standard" | "fast"; /** * The size of the generated image. The minimum total image area is 921600 pixels. Failing this, the image size will be adjusted to by scaling it up, while maintaining the aspect ratio. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "auto" | "auto_2K" | "auto_4K"; /** * List of URLs of input images for editing. Presently, up to 10 image inputs are allowed. If over 10 images are sent, only the last 10 will be used. */ image_urls: Array; /** * If set to a number greater than one, enables multi-image generation. The model will potentially return up to `max_images` images every generation, and in total, `num_images` generations will be carried out. In total, the number of images generated will be between `num_images` and `max_images*num_images`. The total number of images (image inputs + image outputs) must not exceed 15 Default value: `1` */ max_images?: number; /** * Number of separate model generations to be run with the prompt. Default value: `1` */ num_images?: number; /** * The text prompt used to edit the image */ prompt: string; /** * Random seed to control the stochasticity of image generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedDream4T2IInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The mode to use for enhancing prompt enhancement. Standard mode provides higher quality results but takes longer to generate. Fast mode provides average quality results but takes less time to generate. Default value: `"standard"` */ enhance_prompt_mode?: "standard" | "fast"; /** * The size of the generated image. Total pixels must be between 960x960 and 4096x4096. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "auto" | "auto_2K" | "auto_4K"; /** * If set to a number greater than one, enables multi-image generation. The model will potentially return up to `max_images` images every generation, and in total, `num_images` generations will be carried out. In total, the number of images generated will be between `num_images` and `max_images*num_images`. Default value: `1` */ max_images?: number; /** * Number of separate model generations to be run with the prompt. Default value: `1` */ num_images?: number; /** * The text prompt used to generate the image */ prompt: string; /** * Random seed to control the stochasticity of image generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedDream50LiteEditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Total pixels must be between 2560x1440 and 3072x3072. In case the image size does not fall within these parameters, the image size will be adjusted to by scaling. Default value: `auto_2K` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "auto_2K" | "auto_3K"; /** * List of URLs of input images for editing. Presently, up to 10 image inputs are allowed. If over 10 images are sent, only the last 10 will be used. */ image_urls: Array; /** * If set to a number greater than one, enables multi-image generation. The model will potentially return up to `max_images` images every generation, and in total, `num_images` generations will be carried out. In total, the number of images generated will be between `num_images` and `max_images*num_images`. Default value: `1` */ max_images?: number; /** * Number of separate model generations to be run with the prompt. Default value: `1` */ num_images?: number; /** * The text prompt used to edit the image */ prompt: string; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedDream50LiteT2IInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The size of the generated image. Total pixels must be between 2560x1440 and 3072x3072. In case the image size does not fall within these parameters, the image size will be adjusted to by scaling. Default value: `auto_2K` */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9" | "auto_2K" | "auto_3K"; /** * If set to a number greater than one, enables multi-image generation. The model will potentially return up to `max_images` images every generation, and in total, `num_images` generations will be carried out. In total, the number of images generated will be between `num_images` and `max_images*num_images`. Default value: `1` */ max_images?: number; /** * Number of separate model generations to be run with the prompt. Default value: `1` */ num_images?: number; /** * The text prompt used to generate the image */ prompt: string; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedDreamInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Controls how closely the output image aligns with the input prompt. Higher values mean stronger prompt correlation. Default value: `2.5` */ guidance_scale?: number; /** * Use for finer control over the output image size. Will be used over aspect_ratio, if both are provided. Width and height must be between 512 and 2048. */ image_size?: ImageSize | "square_hd" | "square" | "portrait_4_3" | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; /** * Number of images to generate Default value: `1` */ num_images?: number; /** * The text prompt used to generate the image */ prompt: string; /** * Random seed to control the stochasticity of image generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedEditInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * Controls how closely the output image aligns with the input prompt. Higher values mean stronger prompt correlation. Default value: `0.5` */ guidance_scale?: number; /** * URL of the image to be edited. */ image_url: string | Blob | File; /** * The text prompt used to edit the image */ prompt: string; /** * Random seed to control the stochasticity of image generation. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; }; export type SeedVRImageInput = { /** * The input image to be processed */ image_url: string | Blob | File; /** * The noise scale to use for the generation process. Default value: `0.1` */ noise_scale?: number; /** * The format of the output image. Default value: `"jpg"` */ output_format?: "png" | "jpg" | "webp"; /** * The random seed used for the generation process. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The target resolution to upscale to when `upscale_mode` is `target`. Default value: `"1080p"` */ target_resolution?: "720p" | "1080p" | "1440p" | "2160p"; /** * Upscaling factor to be used. Will multiply the dimensions with this factor when `upscale_mode` is `factor`. Default value: `2` */ upscale_factor?: number; /** * The mode to use for the upscale. If 'target', the upscale factor will be calculated based on the target resolution. If 'factor', the upscale factor will be used directly. Default value: `"factor"` */ upscale_mode?: "target" | "factor"; }; export type SeedVRImageOutput = { /** * Upscaled image file after processing */ image: ImageFile; /** * The random seed used for the generation process. */ seed: number; }; export type SeedVRSeamlessImageInput = { /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** * The input image to be processed */ image_url: string | Blob | File; /** * The noise scale to use for the generation process. Default value: `0.1` */ noise_scale?: number; /** * The format of the output image. Default value: `"png"` */ output_format?: "png" | "jpeg" | "webp"; /** * The random seed used for the generation process. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The target resolution to upscale to when `upscale_mode` is `target`. Default value: `"1080p"` */ target_resolution?: "720p" | "1080p" | "1440p" | "2160p"; /** * Upscaling factor to be used. Will multiply the dimensions with this factor when `upscale_mode` is `factor`. Default value: `2` */ upscale_factor?: number; /** * The mode to use for the upscale. If 'target', the upscale factor will be calculated based on the target resolution. If 'factor', the upscale factor will be used directly. Default value: `"factor"` */ upscale_mode?: "target" | "factor"; }; export type SeedVRVideoInput = { /** * The noise scale to use for the generation process. Default value: `0.1` */ noise_scale?: number; /** * The format of the output video. Default value: `"X264 (.mp4)"` */ output_format?: "X264 (.mp4)" | "VP9 (.webm)" | "PRORES4444 (.mov)" | "GIF (.gif)"; /** * The quality of the output video. Default value: `"high"` */ output_quality?: "low" | "medium" | "high" | "maximum"; /** * The write mode of the output video. Default value: `"balanced"` */ output_write_mode?: "fast" | "balanced" | "small"; /** * The random seed used for the generation process. */ seed?: number; /** * If `True`, the media will be returned as a data URI and the output data won't be available in the request history. */ sync_mode?: boolean; /** * The target resolution to upscale to when `upscale_mode` is `target`. Default value: `"1080p"` */ target_resolution?: "720p" | "1080p" | "1440p" | "2160p"; /** * Upscaling factor to be used. Will multiply the dimensions with this factor when `upscale_mode` is `factor`. Default value: `2` */ upscale_factor?: number; /** * The mode to use for the upscale. If 'target', the upscale factor will be calculated based on the target resolution. If 'factor', the upscale factor will be used directly. Default value: `"factor"` */ upscale_mode?: "target" | "factor"; /** * The input video to be processed */ video_url: string | Blob | File; }; export type SegmentSamplingSettings = { /** * Maximum number of tokens to generate. */ max_tokens?: number; /** * Sampling temperature to use. Higher values will make the output more random, while lower values will make it more focused and deterministic. Default value: `1` */ temperature?: number; /** * Nucleus sampling probability mass to use, between 0 and 1. Default value: `1` */ top_p?: number; }; export type SemanticImageInput = { /** * The hypothesis image to use for the measurement. */ hypothesis: string; /** * The text reference to use for the measurement. */ reference: string; }; export type SetptsVideoInput = { /** * Speed multiplier (0.25-4.0). Values > 1.0 speed up, < 1.0 slow down. E.g., 2.0 = 2x faster, 0.5 = half speed Default value: `4` */ speed_factor?: number; /** * URL of the video file to change speed */ video_url: string | Blob | File; }; export type SfxV15VideoToAudioInput = { /** * The duration of the generated audio in seconds Default value: `10` */ duration?: number; /** * The number of samples to generate from the model Default value: `2` */ num_samples?: number; /** * The seed to use for the generation. If not provided, a random seed will be used Default value: `8069` */ seed?: number; /** * The start offset in seconds to start the audio generation from */ start_offset?: number; /** * Additional description to guide the model */ text_prompt?: string; /** * A video url that can accessed from the API to process and add sound effects */ video_url: string | Blob | File; }; export type SfxV15VideoToAudioOutput = { /** * The generated sound effects audio */ audio: Array; }; export type SfxV15VideoToVideoOutput = { /** * The processed video with sound effects */ video: Array; }; export type SfxV1VideoToAudioInput = { /** * The duration of the generated audio in seconds Default value: `10` */ duration?: number; /** * The number of samples to generate from the model Default value: `2` */ num_samples?: number; /** * The seed to use for the generation. If not provided, a random seed will be used Default value: `2105` */ seed?: number; /** * Additional description to guide the model */ text_prompt?: string; /** * A video url that can accessed from the API to process and add sound effects */ video_url: string | Blob | File; }; export type SfxV1VideoToAudioOutput = { /** * The generated sound effects audio */ audio: Array