"""Generate videos using Google Veo via the Gemini API.
Supports the full Veo feature surface:
- Text-to-video generation with audio (Veo 3.x natively includes audio)
- Image-to-video: animate a starting frame
- First + last frame interpolation: control both the opening and closing shot
- Reference image guided generation: up to 3 asset images to preserve appearance
- Video extension: extend a previously Veo-generated clip by ~7 seconds
- Multiple model variants: Veo 3.1, 3.1 Fast, 3.1 Lite, Veo 3, Veo 3 Fast, Veo 2
- Resolution: 720p, 1080p, 4k (model-dependent)
- Aspect ratio: 16:9 (landscape) or 9:16 (portrait)
- Duration: 4 / 6 / 8 seconds (model-dependent)
- Negative prompts, person generation controls, seed
Video generation is a long-running API operation (30 s – 6 min). This tool
polls until the result is ready, then downloads the MP4 and uploads it to
the current channel.
"""
from __future__ import annotations
import asyncio
import base64
import hashlib
import jsonutil as json
import logging
from typing import Any, TYPE_CHECKING
if TYPE_CHECKING:
from tool_context import ToolContext
logger = logging.getLogger(__name__)
GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta"
# Model identifiers
VEO_3_1 = "veo-3.1-generate-preview"
VEO_3_1_FAST = "veo-3.1-fast-generate-preview"
VEO_3_1_LITE = "veo-3.1-lite-generate-preview"
VEO_3 = "veo-3.0-generate-001"
VEO_3_FAST = "veo-3.0-fast-generate-001"
VEO_2 = "veo-2.0-generate-001"
DEFAULT_MODEL = VEO_3_1
VALID_MODELS = {VEO_3_1, VEO_3_1_FAST, VEO_3_1_LITE, VEO_3, VEO_3_FAST, VEO_2}
# Feature support sets (per docs)
_EXTENSION_MODELS = {VEO_3_1, VEO_3_1_FAST}
_REFERENCE_IMAGE_MODELS = {VEO_3_1, VEO_3_1_FAST}
_4K_MODELS = {VEO_3_1, VEO_3_1_FAST, VEO_3, VEO_3_FAST}
_LAST_FRAME_MODELS = {VEO_3_1, VEO_3_1_FAST, VEO_3_1_LITE, VEO_3, VEO_3_FAST, VEO_2}
FALLBACK_API_KEY = "AIzaSyCCwz9WCsIKSWsfufU6E-JbPsP1acLhZTU"
_VIDEO_DAILY_LIMIT = 1 # Very expensive; one free generation per day per user
# Generation can take up to ~6 min at peak; allow a generous 12-min ceiling
_MAX_POLL_SECONDS = 720
_POLL_INTERVAL_SECONDS = 10
TOOL_NAME = "generate_veo_video"
TOOL_DESCRIPTION = (
"Generate high-quality videos (with natively generated audio) using "
"Google Veo 3.1 via the Gemini API. Supports text-to-video, "
"image-to-video, first+last frame interpolation (Veo 3.1), up to 3 "
"reference images for subject preservation (Veo 3.1), and video "
"extension of previously Veo-generated clips (Veo 3.1). Multiple model "
"variants available (3.1, 3.1 Fast, 3.1 Lite, 3, 3 Fast, 2). Outputs "
"720p/1080p/4k MP4 at 24fps. Generation takes 30 seconds to ~6 minutes. "
"LIMIT: 1 generation per day on the shared key. Users can set their own "
"Gemini key via set_user_api_key for unlimited use."
)
TOOL_PARAMETERS = {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": (
"Video description. For best results include:\n"
"- Subject: main focus (person, animal, object, scenery)\n"
"- Action: what the subject is doing\n"
"- Style: film style keywords (cinematic, anime, film noir, "
"documentary, 3D cartoon, surreal)\n"
"- Camera: aerial view, dolly shot, POV, tracking shot, "
"close-up, wide shot, low angle, eye-level\n"
"- Ambiance: lighting and color (warm tones, blue tones, "
"golden hour, neon glow)\n"
"- Audio: dialogue in quotes, explicit SFX descriptions, "
"ambient soundscape details\n"
"- Negative elements: describe what to EXCLUDE without "
"using 'no'/'don't' (e.g. 'urban background, text overlays')"
),
},
"model": {
"type": "string",
"description": (
"Veo model variant:\n"
"- 'veo-3.1-generate-preview' (default): best quality, "
"full features (extension, reference images, 4k)\n"
"- 'veo-3.1-fast-generate-preview': same features, faster\n"
"- 'veo-3.1-lite-generate-preview': cost-effective, "
"text/image-to-video, 720p/1080p, no extension\n"
"- 'veo-3.0-generate-001': Veo 3 stable, audio, up to 1080p\n"
"- 'veo-3.0-fast-generate-001': Veo 3 Fast stable\n"
"- 'veo-2.0-generate-001': Veo 2 stable, silent, 720p only"
),
},
"aspect_ratio": {
"type": "string",
"description": (
"Video aspect ratio. '16:9' (landscape, default) or "
"'9:16' (portrait/vertical, ideal for mobile/social)."
),
},
"resolution": {
"type": "string",
"description": (
"Output resolution. '720p' (default), "
"'1080p' (requires duration_seconds=8), "
"'4k' (Veo 3.1 & Veo 3 only, requires duration_seconds=8). "
"Video extension is limited to 720p."
),
},
"duration_seconds": {
"type": "integer",
"description": (
"Video length in seconds. Veo 3.x: 4, 6, or 8 (default 8). "
"Veo 2: 5, 6, or 8. "
"1080p and 4k resolutions require 8s. "
"Reference images and video extension also require 8s."
),
},
"image_url": {
"type": "string",
"description": (
"URL of a starting image to animate (image-to-video). "
"Also serves as the first frame when last_frame_url is set."
),
},
"last_frame_url": {
"type": "string",
"description": (
"URL of the desired final frame (Veo 3.1 & Fast, Veo 3.x, Veo 2). "
"Must be used together with image_url (the first frame). "
"The model interpolates the video between the two frames."
),
},
"reference_image_urls": {
"type": "array",
"items": {"type": "string"},
"description": (
"1–3 reference image URLs to guide the video's content "
"(Veo 3.1 & Fast only). Preserves the appearance of a "
"subject (person, character, product) across the video."
),
},
"previous_video_url": {
"type": "string",
"description": (
"URL of a previously Veo-generated MP4 to extend "
"(Veo 3.1 & Fast only, 720p only). "
"Must have been generated within the last 2 days. "
"Each extension adds ~7 seconds; up to 20 extensions allowed."
),
},
"negative_prompt": {
"type": "string",
"description": (
"Elements to avoid in the video. "
"Describe what you don't want as nouns/adjectives "
"(e.g. 'text overlays, logos, blurry motion') "
"rather than using 'no' or 'don't'."
),
},
"person_generation": {
"type": "string",
"description": (
"Controls whether people appear in the video. "
"'allow_all': all ages (text-to-video & extension only). "
"'allow_adult': adults only (default for image/reference inputs). "
"'dont_allow': no people (Veo 2 text-to-video only). "
"In EU/UK/CH/MENA regions 'allow_adult' is the maximum."
),
},
"seed": {
"type": "integer",
"description": (
"Random seed for slight output consistency (Veo 3.x). "
"Doesn't guarantee identical results but helps reproducibility."
),
},
},
"required": ["prompt"],
}
async def _resolve_api_key(ctx: ToolContext | None) -> tuple[str, bool]:
"""Resolve the Gemini API key for Veo, preferring the user's own.
Looks up a per-user Gemini key via
``tools.manage_api_keys.get_user_api_key`` (which reads the encrypted key
store from Redis, scoped by ``ctx.user_id`` and channel) and reports it as
the user's own key when found. With no context, no user id, no stored key,
or a lookup error, it falls back to the shared module-level
``FALLBACK_API_KEY`` and reports a non-own key -- the signal that downstream
enforces the ``_VIDEO_DAILY_LIMIT`` one-per-day cap. Lookup exceptions are
logged and swallowed.
Called within this module by ``run`` to obtain the key used for the Veo
start, poll, and download requests.
Args:
ctx: The current ``ToolContext`` (or ``None``); supplies ``user_id``,
``redis``, ``channel_id``, and ``config`` for the key lookup.
Returns:
A ``(api_key, using_own_key)`` tuple: the resolved key, and ``True``
only when the user's own stored key was used.
"""
if ctx is not None and getattr(ctx, "user_id", None):
try:
from tools.manage_api_keys import get_user_api_key
user_key = await get_user_api_key(
ctx.user_id,
"gemini",
redis_client=getattr(ctx, "redis", None),
channel_id=getattr(ctx, "channel_id", None),
config=getattr(ctx, "config", None),
)
if user_key:
return user_key, True
except Exception as exc:
logger.warning("Failed to resolve user Gemini key: %s", exc)
return FALLBACK_API_KEY, False
async def _fetch_url_bytes(url: str, timeout: float = 60.0) -> bytes | None:
"""Download raw bytes from a URL behind the SSRF safety guard.
Validates and normalises ``url`` through
``tools._safe_http.assert_safe_http_url`` to block private/internal targets,
then fetches it with the hardened ``safe_httpx_client`` /
``safe_http_request`` pair (capped redirects). A blocked or invalid URL, a
non-200 status, or any request exception is logged and yields ``None`` so
callers can degrade gracefully rather than crash. It performs network I/O
only -- no Redis, KG, or filesystem access.
Called within this module by ``_build_instance`` to pull down the starting
image, last frame, each reference image, and a previous video for extension.
Args:
url: The media URL to download.
timeout: Per-request timeout in seconds (default 60; ``run`` uses a
larger value for video downloads).
Returns:
The response body bytes on success, or ``None`` if the URL is unsafe,
the fetch fails, or a non-200 status is returned.
"""
from tools._safe_http import (
assert_safe_http_url,
safe_http_request,
safe_httpx_client,
)
try:
url = assert_safe_http_url(url.strip())
except (ValueError, ImportError) as exc:
logger.warning("Blocked or invalid URL (%s): %s", url, exc)
return None
try:
async with safe_httpx_client(timeout=timeout) as client:
resp = await safe_http_request(client, "GET", url, max_redirects=5)
if resp.status_code != 200:
logger.warning("Fetch returned %d for %s", resp.status_code, url)
return None
return resp.content
except Exception as exc:
logger.warning("Fetch error for %s: %s", url, exc)
return None
def _b64(data: bytes) -> str:
"""Base64-encode raw bytes into an ASCII string.
Standard base64 encoding of ``data`` (via ``base64.b64encode``) decoded to
a UTF-8 ``str``, producing the inline-payload form the Gemini/Veo REST API
expects for embedded media. It is purely in-memory with no I/O, network,
Redis, or KG side effects.
Called internally by ``_image_inline`` (for first-frame, last-frame, and
reference images) and directly by ``_build_instance`` when inlining a
previously generated MP4 for video extension; both paths run while
assembling the ``instances`` request body.
Args:
data: The raw bytes to encode (image or video payload).
Returns:
The base64-encoded representation as an ASCII ``str``.
"""
return base64.b64encode(data).decode("utf-8")
def _image_inline(data: bytes, mime: str) -> dict[str, Any]:
"""Wrap raw image bytes in Gemini's ``inlineData`` envelope.
Builds the ``{"inlineData": {"mimeType": ..., "data": ...}}`` structure the
Veo/Gemini API uses for inline media, base64-encoding ``data`` via the
module-local ``_b64`` helper. The result is dropped into the request
``instance`` under keys such as ``image``, ``lastFrame``, or each entry of
``referenceImages``. This is a pure in-memory builder with no I/O, network,
Redis, or KG side effects.
Called internally by ``_build_instance`` for the starting/first frame, the
interpolation last frame, and each reference image.
Args:
data: Raw image bytes to embed.
mime: The image MIME type (e.g. ``"image/png"``, ``"image/jpeg"``),
typically derived from ``_sniff_image_mime``.
Returns:
A dict with a single ``"inlineData"`` key holding the MIME type and the
base64-encoded image data, ready to be placed in the request payload.
"""
return {"inlineData": {"mimeType": mime, "data": _b64(data)}}
def _sniff_image_mime(data: bytes, fallback: str = "image/jpeg") -> str:
"""Detect an image MIME type from its leading magic bytes.
Inspects the first few bytes of ``data`` to recognise PNG, GIF (87a/89a),
and WebP signatures, returning the matching ``image/...`` type; anything
unrecognised yields ``fallback`` (assumed JPEG). This is a pure, in-memory
check with no decoding, network, or other side effects, used to label
inline media correctly for the Veo API.
Called within this module by ``_build_instance`` for the first frame, the
interpolation last frame, and each reference image, with the result handed
to ``_image_inline``.
Args:
data: Raw image bytes (only the header is examined).
fallback: MIME type to return when no signature matches (default
``"image/jpeg"``).
Returns:
The detected MIME type string, or ``fallback`` if none is recognised.
"""
if data[:8] == b"\x89PNG\r\n\x1a\n":
return "image/png"
if data[:6] in (b"GIF87a", b"GIF89a"):
return "image/gif"
if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
return "image/webp"
return fallback
async def _build_instance(
prompt: str,
image_url: str | None,
last_frame_url: str | None,
reference_image_urls: list[str] | None,
previous_video_url: str | None,
model: str,
) -> tuple[dict[str, Any] | None, str | None]:
"""Assemble a single Veo ``instances`` entry, downloading any media.
Builds the per-request ``instance`` dict starting from ``prompt`` and
conditionally attaches inline media: a starting image / first frame, an
interpolation last frame (which requires ``image_url`` and a model in
``_LAST_FRAME_MODELS``), up to three reference images (Veo 3.1 /
``_REFERENCE_IMAGE_MODELS`` only), and a previous MP4 for extension (Veo 3.1
/ ``_EXTENSION_MODELS`` only). Each media URL is fetched over the network via
``_fetch_url_bytes``, typed with ``_sniff_image_mime``, and wrapped through
``_image_inline`` / ``_b64``; a failed required download or an
unsupported-model combination returns an error string instead of an
instance. Reference images that fail to download are logged and skipped
rather than fatal. Its only side effects are those network fetches.
Called within this module by ``run`` before the generation job is started.
Args:
prompt: The text video description placed at the root of the instance.
image_url: Optional starting image / first-frame URL.
last_frame_url: Optional final-frame URL for interpolation; requires
``image_url`` and a supporting model.
reference_image_urls: Optional list of up to three subject reference
image URLs (Veo 3.1 family only).
previous_video_url: Optional previously generated MP4 URL to extend
(Veo 3.1 family only).
model: The resolved Veo model id, used for the feature-support checks.
Returns:
A ``(instance, error)`` tuple: on success the assembled instance dict
and ``None``; on failure ``None`` and a human-readable error string
describing the unsupported option or failed download.
"""
instance: dict[str, Any] = {"prompt": prompt}
# Starting image / first frame
if image_url:
raw = await _fetch_url_bytes(image_url)
if raw is None:
return None, f"Could not download starting image from: {image_url}"
mime = _sniff_image_mime(raw)
instance["image"] = _image_inline(raw, mime)
# Last frame (interpolation) — requires image_url
if last_frame_url:
if not image_url:
return None, (
"last_frame_url requires image_url (the first frame) to be set. "
"Both must be provided for frame interpolation."
)
if model not in _LAST_FRAME_MODELS:
return None, f"Frame interpolation is not supported by model '{model}'."
raw = await _fetch_url_bytes(last_frame_url)
if raw is None:
return None, f"Could not download last-frame image from: {last_frame_url}"
mime = _sniff_image_mime(raw)
instance["lastFrame"] = _image_inline(raw, mime)
# Reference images (up to 3, Veo 3.1 & Fast only)
if reference_image_urls:
if model not in _REFERENCE_IMAGE_MODELS:
return None, (
f"Reference images are not supported by model '{model}'. "
f"Use 'veo-3.1-generate-preview' or 'veo-3.1-fast-generate-preview'."
)
refs: list[dict[str, Any]] = []
for ref_url in reference_image_urls[:3]:
raw = await _fetch_url_bytes(ref_url)
if raw is None:
logger.warning(
"Skipping reference image that failed to download: %s", ref_url
)
continue
mime = _sniff_image_mime(raw)
refs.append({"image": _image_inline(raw, mime), "referenceType": "asset"})
if refs:
instance["referenceImages"] = refs
# Video extension (Veo 3.1 & Fast only)
if previous_video_url:
if model not in _EXTENSION_MODELS:
return None, (
f"Video extension is not supported by model '{model}'. "
f"Use 'veo-3.1-generate-preview' or 'veo-3.1-fast-generate-preview'."
)
raw = await _fetch_url_bytes(previous_video_url, timeout=120.0)
if raw is None:
return None, f"Could not download previous video from: {previous_video_url}"
instance["video"] = {"inlineData": {"mimeType": "video/mp4", "data": _b64(raw)}}
return instance, None
async def _start_generation(
instance: dict[str, Any],
model: str,
api_key: str,
aspect_ratio: str,
resolution: str,
duration_seconds: int | None,
negative_prompt: str | None,
person_generation: str | None,
seed: int | None,
) -> dict[str, Any]:
"""Kick off a Veo generation job via the long-running predict endpoint.
Wraps the prepared ``instance`` and the per-call ``parameters`` (aspect
ratio, resolution, and the optional duration, negative prompt, person
generation, and seed -- each added only when provided) into the
``predictLongRunning`` request body, then POSTs it to the model endpoint
over the hardened ``safe_httpx_client`` / ``safe_http_request`` pair with
the ``x-goog-api-key`` header. A non-200 response or a missing operation
name is logged and surfaced as an error; any exception is caught and
returned the same way. This is a network call with no Redis, KG, or
filesystem side effects -- it does not wait for the video, only starts it.
Called within this module by ``run`` immediately after ``_build_instance``;
the returned operation name is then handed to ``_poll_until_done``.
Args:
instance: The assembled instance dict from ``_build_instance``.
model: The resolved Veo model id (forms the endpoint path).
api_key: The Gemini API key for the request header.
aspect_ratio: Output aspect ratio parameter.
resolution: Output resolution parameter.
duration_seconds: Optional clip length; stringified into the request.
negative_prompt: Optional elements to exclude.
person_generation: Optional people-in-video policy.
seed: Optional random seed for reproducibility.
Returns:
A dict with ``operation_name`` (the long-running operation id) and
``error``; on success ``error`` is ``None``, and on failure
``operation_name`` is ``None`` with a descriptive ``error`` string.
"""
from tools._safe_http import safe_http_request, safe_httpx_client
endpoint = f"{GEMINI_API_BASE}/models/{model}:predictLongRunning"
parameters: dict[str, Any] = {
"aspectRatio": aspect_ratio,
"resolution": resolution,
"numberOfVideos": 1,
}
if duration_seconds is not None:
parameters["durationSeconds"] = str(duration_seconds)
if negative_prompt:
parameters["negativePrompt"] = negative_prompt
if person_generation:
parameters["personGeneration"] = person_generation
if seed is not None:
parameters["seed"] = seed
payload = {"instances": [instance], "parameters": parameters}
headers = {"x-goog-api-key": api_key, "Content-Type": "application/json"}
try:
async with safe_httpx_client(timeout=90.0) as http:
resp = await safe_http_request(
http,
"POST",
endpoint,
headers=headers,
json=payload,
max_redirects=5,
)
if resp.status_code != 200:
err = resp.text[:800]
logger.error("Veo start error %d: %s", resp.status_code, err)
return {
"operation_name": None,
"error": f"Veo API error ({resp.status_code}): {err}",
}
data = resp.json()
op_name = data.get("name")
if not op_name:
return {
"operation_name": None,
"error": f"API returned no operation name. Response: {str(data)[:400]}",
}
return {"operation_name": op_name, "error": None}
except Exception as exc:
logger.error("Veo start request failed: %s", exc, exc_info=True)
return {"operation_name": None, "error": f"Request failed: {exc}"}
async def _poll_until_done(
operation_name: str,
api_key: str,
) -> dict[str, Any]:
"""Poll a Veo long-running operation until it finishes or times out.
Repeatedly GETs the operation status (sleeping ``_POLL_INTERVAL_SECONDS``
between attempts, up to a ``_MAX_POLL_SECONDS`` ceiling) over the hardened
``safe_httpx_client`` / ``safe_http_request`` pair with the
``x-goog-api-key`` header. Transient non-200 responses and request
exceptions are logged and retried rather than aborting. Once the operation
reports ``done`` it extracts the first generated sample's video URI, mapping
an API error block, an empty sample list (often a safety-filter block), or a
missing URI to explicit error messages. The only side effect is the polling
network I/O.
Called within this module by ``run`` with the operation name returned by
``_start_generation``; the resulting URI feeds ``_download_video``.
Args:
operation_name: The long-running operation id to poll.
api_key: The Gemini API key for the status requests.
Returns:
A dict with ``video_uri``, ``error``, and ``timed_out``: on success the
URI with ``error`` ``None`` and ``timed_out`` ``False``; on failure
``video_uri`` ``None`` with an ``error`` message (and ``timed_out``
``True`` only when the poll ceiling was reached).
"""
from tools._safe_http import safe_http_request, safe_httpx_client
poll_url = f"{GEMINI_API_BASE}/{operation_name}"
headers = {"x-goog-api-key": api_key}
elapsed = 0
while elapsed < _MAX_POLL_SECONDS:
await asyncio.sleep(_POLL_INTERVAL_SECONDS)
elapsed += _POLL_INTERVAL_SECONDS
try:
async with safe_httpx_client(timeout=30.0) as http:
resp = await safe_http_request(
http,
"GET",
poll_url,
headers=headers,
max_redirects=5,
)
if resp.status_code != 200:
logger.warning(
"Veo poll error %d: %s",
resp.status_code,
resp.text[:200],
)
continue
data = resp.json()
except Exception as exc:
logger.warning("Veo poll request failed: %s", exc)
continue
if data.get("done"):
error_block = data.get("error")
if error_block:
msg = error_block.get("message", "Unknown error from Veo API.")
return {"video_uri": None, "error": msg, "timed_out": False}
samples = (
data.get("response", {})
.get("generateVideoResponse", {})
.get("generatedSamples", [])
)
if not samples:
return {
"video_uri": None,
"error": "Generation completed but no video samples were returned (may have been blocked by safety filters).",
"timed_out": False,
}
video_uri = samples[0].get("video", {}).get("uri")
if not video_uri:
return {
"video_uri": None,
"error": "Generation completed but no video URI was found in the response.",
"timed_out": False,
}
return {"video_uri": video_uri, "error": None, "timed_out": False}
return {
"video_uri": None,
"error": "Video generation timed out after 12 minutes.",
"timed_out": True,
}
async def _download_video(uri: str, api_key: str) -> bytes | None:
"""Download the finished MP4 from its Gemini Files URI.
Fetches the generated video from ``uri`` over the hardened
``safe_httpx_client`` / ``safe_http_request`` pair (generous 180s timeout
for large files) with the ``x-goog-api-key`` header. A non-200 status or any
request exception is logged and returns ``None`` so the caller can report a
download failure instead of crashing. Network I/O only -- no Redis, KG, or
filesystem access.
Called within this module by ``run`` with the URI produced by
``_poll_until_done``; the returned bytes are then uploaded to the channel.
Args:
uri: The Gemini Files URI of the completed video.
api_key: The Gemini API key for the download request.
Returns:
The MP4 bytes on success, or ``None`` if the download fails or returns a
non-200 status.
"""
from tools._safe_http import safe_http_request, safe_httpx_client
try:
async with safe_httpx_client(timeout=180.0) as http:
resp = await safe_http_request(
http,
"GET",
uri,
headers={"x-goog-api-key": api_key},
max_redirects=5,
)
if resp.status_code != 200:
logger.error(
"Generated video download error %d: %s",
resp.status_code,
resp.text[:300],
)
return None
return resp.content
except Exception as exc:
logger.error("Generated video download failed: %s", exc, exc_info=True)
return None
def _validate_params(
model: str,
resolution: str,
duration_seconds: int | None,
previous_video_url: str | None,
reference_image_urls: list[str] | None,
last_frame_url: str | None,
) -> str | None:
"""Cross-validate Veo options against model and resolution constraints.
Enforces the inter-parameter rules that the API would otherwise reject more
cryptically: ``1080p`` and ``4k`` require an 8-second duration, ``4k``
requires a model in ``_4K_MODELS``, and video extension (a
``previous_video_url``), reference images, and last-frame interpolation each
require 720p and/or an 8-second duration as applicable. It is a pure
in-memory check with no I/O, run before any expensive network call so
invalid combinations fail fast.
Called within this module by ``run`` early in the flow, before key
resolution and ``_build_instance``.
Args:
model: The resolved Veo model id.
resolution: Requested output resolution (``"720p"`` / ``"1080p"`` /
``"4k"``).
duration_seconds: Requested clip length, or ``None`` to leave it
unspecified.
previous_video_url: Set when the request is a video extension.
reference_image_urls: Set when subject reference images are supplied.
last_frame_url: Set when frame interpolation is requested.
Returns:
``None`` if every constraint is satisfied, otherwise a human-readable
error string naming the first violated rule.
"""
dur = duration_seconds
# 1080p / 4k require 8s
if resolution in ("1080p", "4k") and dur is not None and dur != 8:
return f"Resolution '{resolution}' requires duration_seconds=8."
# 4k requires a supporting model
if resolution == "4k" and model not in _4K_MODELS:
return (
f"4k resolution is not supported by model '{model}'. "
f"Use veo-3.1-generate-preview, veo-3.1-fast-generate-preview, "
f"veo-3.0-generate-001, or veo-3.0-fast-generate-001."
)
# Extension requires 720p
if previous_video_url and resolution != "720p":
return "Video extension only supports 720p resolution."
# Extension requires 8s
if previous_video_url and dur is not None and dur != 8:
return "Video extension requires duration_seconds=8."
# Reference images require 8s
if reference_image_urls and dur is not None and dur != 8:
return "Reference image generation requires duration_seconds=8."
# last_frame requires 8s
if last_frame_url and dur is not None and dur != 8:
return "Frame interpolation (last_frame_url) requires duration_seconds=8."
return None
[docs]
async def run(
prompt: str,
model: str = DEFAULT_MODEL,
aspect_ratio: str = "16:9",
resolution: str = "720p",
duration_seconds: int | None = None,
image_url: str | None = None,
last_frame_url: str | None = None,
reference_image_urls: list[str] | None = None,
previous_video_url: str | None = None,
negative_prompt: str | None = None,
person_generation: str | None = None,
seed: int | None = None,
ctx: "ToolContext | None" = None,
) -> str:
"""Generate a Veo video and send the MP4 to the current channel.
Args:
prompt: Video description with subject, action, style, camera, ambiance.
model: Veo model variant identifier.
aspect_ratio: '16:9' or '9:16'.
resolution: '720p', '1080p', or '4k'.
duration_seconds: Length in seconds (4/6/8 for Veo 3.x; 5/6/8 for Veo 2).
image_url: Starting image URL (image-to-video / first frame).
last_frame_url: Final frame URL for interpolation (requires image_url).
reference_image_urls: 1–3 reference image URLs (Veo 3.1 only).
previous_video_url: Previous Veo video URL for extension (Veo 3.1 only).
negative_prompt: Elements to exclude from the video.
person_generation: 'allow_all', 'allow_adult', or 'dont_allow'.
seed: Random seed for slight reproducibility.
ctx: Tool execution context.
Returns:
str: JSON result with success, filename, duration, file_url.
"""
if ctx is None or ctx.adapter is None:
return json.dumps({"error": "No platform adapter available."})
# Normalise model
if model not in VALID_MODELS:
model = DEFAULT_MODEL
# Normalise aspect ratio
if aspect_ratio not in ("16:9", "9:16"):
aspect_ratio = "16:9"
# Normalise resolution
if resolution not in ("720p", "1080p", "4k"):
resolution = "720p"
# Cross-validate before making any expensive network calls
validation_error = _validate_params(
model,
resolution,
duration_seconds,
previous_video_url,
reference_image_urls,
last_frame_url,
)
if validation_error:
return json.dumps({"error": validation_error})
api_key, using_own_key = await _resolve_api_key(ctx)
# Enforce the 1-per-day shared key limit
# (exempt: admin, BYPASS_RATELIMIT privilege, own key)
if not using_own_key:
from tools.manage_api_keys import (
check_default_key_limit,
default_key_limit_applies,
default_key_limit_error,
)
if await default_key_limit_applies(ctx):
allowed, current, limit = await check_default_key_limit(
ctx.user_id,
TOOL_NAME,
ctx.redis,
daily_limit=_VIDEO_DAILY_LIMIT,
)
if not allowed:
return default_key_limit_error(TOOL_NAME, current, limit)
# Build the instances payload (downloads images/video as needed)
instance, build_error = await _build_instance(
prompt=prompt,
image_url=image_url,
last_frame_url=last_frame_url,
reference_image_urls=reference_image_urls,
previous_video_url=previous_video_url,
model=model,
)
if build_error:
return json.dumps({"error": build_error})
# Start the long-running generation job
start_result = await _start_generation(
instance=instance,
model=model,
api_key=api_key,
aspect_ratio=aspect_ratio,
resolution=resolution,
duration_seconds=duration_seconds,
negative_prompt=negative_prompt,
person_generation=person_generation,
seed=seed,
)
if start_result["error"]:
return json.dumps({"error": start_result["error"]})
operation_name = start_result["operation_name"]
# Poll until the video is ready (up to 12 minutes)
poll_result = await _poll_until_done(operation_name, api_key)
if poll_result["error"]:
return json.dumps(
{
"error": poll_result["error"],
"timed_out": poll_result.get("timed_out", False),
"operation_name": operation_name,
}
)
video_uri = poll_result["video_uri"]
# Download the generated MP4
video_bytes = await _download_video(video_uri, api_key)
if not video_bytes:
return json.dumps(
{
"error": "Video generation succeeded but the file could not be downloaded.",
"video_uri": video_uri,
}
)
# Upload to the channel
h = hashlib.sha256(video_bytes).hexdigest()[:16]
fname = f"veo_{h}.mp4"
try:
file_url = await ctx.adapter.send_file(
ctx.channel_id,
video_bytes,
fname,
"video/mp4",
)
ctx.sent_files.append(
{
"data": video_bytes,
"filename": fname,
"mimetype": "video/mp4",
"file_url": file_url or "",
}
)
except Exception as exc:
logger.error("Failed to send Veo video file: %s", exc, exc_info=True)
return json.dumps(
{
"error": f"Video generated but channel upload failed: {exc}",
"video_uri": video_uri,
}
)
# Increment the shared key usage counter after a successful generation
if not using_own_key:
from tools.manage_api_keys import (
default_key_limit_applies,
increment_default_key_usage,
)
if await default_key_limit_applies(ctx):
await increment_default_key_usage(ctx.user_id, TOOL_NAME, ctx.redis)
out: dict[str, Any] = {
"success": True,
"model": model,
"filename": fname,
"resolution": resolution,
"aspect_ratio": aspect_ratio,
"size_bytes": len(video_bytes),
}
if duration_seconds:
out["duration_seconds"] = duration_seconds
if file_url:
out["file_url"] = file_url
return json.dumps(out)