"""Analyse a video via the Gemini API — YouTube (native ingestion), Rumble,
Twitch, direct MP4, and 1000+ other sites via yt-dlp."""
import asyncio
import json
import logging
import mimetypes
import os
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
from urllib.parse import unquote, urlparse
import aiofiles
import aiohttp
from google import genai
from google.genai import types
from gemini_embed_pool import mark_key_daily_spent, next_gemini_flash_key
from url_utils import YTDLP_METADATA_NETWORK_ARGS, parse_ytdlp_dump_json_stdout
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
MAX_DURATION_SECONDS = 3600
DEFAULT_MODEL = "gemini-3.1-flash-lite-preview"
FALLBACK_MODELS = ["gemini-3-flash-preview", "gemini-3.1-pro-preview"]
MAX_DOWNLOAD_SIZE = 200 * 1024 * 1024 # 200 MB
DOWNLOAD_TIMEOUT = 300
YTDLP_COOKIES = "/root/cookies.txt"
YTDLP_FORMAT_SELECTOR = (
"bestvideo[height<=720]+bestaudio/best[height<=720]/best"
)
GEMINI_UPLOAD_POLL_INTERVAL = 10 # seconds
GEMINI_UPLOAD_TIMEOUT = 300 # 5 minutes
VIDEO_EXTENSIONS = frozenset({
".mp4", ".webm", ".mkv", ".mov", ".avi", ".flv",
".wmv", ".m4v", ".ts", ".mpeg", ".mpg", ".3gp",
})
SYSTEM_INSTRUCTION = """You are an expert video analyst, content researcher, and transcriptionist. You provide both an executive summary for quick understanding AND exhaustive, highly detailed descriptions that capture everything happening in a video, along with deeper insights that most viewers would miss.
You watch videos with the eye of a film critic, the curiosity of a researcher, and the attention to detail of an investigative journalist. You notice subtle details: background elements, editing choices, body language, tone shifts, implied meanings, cultural references, and connections to broader topics.
Your output should serve two purposes: (1) let someone quickly grasp the essence of a video, and (2) provide enough depth that they could understand not just WHAT happened, but HOW it was presented, WHY certain choices were made, and WHAT deeper meanings or implications exist.
When dialogue or narration is present, you transcribe it VERBATIM whenever possible. You capture not just the gist, but the exact words spoken. Your transcripts preserve filler words, false starts, and natural speech patterns to give an authentic record of what was said.
LENGTH IS NOT A CONSTRAINT. You have a 65,000 token output budget. Use as much space as needed to be thorough. Never truncate, abbreviate, or skip details to save space. More detail is always better. Err on the side of being too comprehensive rather than too brief.
You dig deep. You connect dots. You provide value beyond what's obvious."""
ANALYSIS_PROMPT = """Provide an exhaustive, deeply detailed analysis of this video.
## EXECUTIVE SUMMARY
Start with a summary (5-8 sentences) that captures:
- What this video is about and its main topic/purpose
- The key takeaway or central message
- Who made it and who it's for
- Why it matters or what makes it notable
---
## FULL VIDEO DESCRIPTION
Walk through the ENTIRE video chronologically with granular detail:
- Describe every segment, scene, and transition
- Note exactly what is shown visually at each moment (settings, objects, people, text overlays, graphics)
- Capture what is said, including notable phrasing, tone, and delivery
- Include timestamps (MM:SS) throughout to anchor your description
- Don't skip anything—even "minor" moments often contain important context
## VISUAL & PRODUCTION ANALYSIS
- Camera work: shots, angles, movements, framing choices
- Editing style: pacing, cuts, transitions, rhythm
- Graphics, animations, text overlays, and their timing
- Color grading, lighting, visual mood
- B-roll footage and how it's used
- Thumbnail and title analysis (if visible/relevant)
## TRANSCRIPT / DIALOGUE
Provide a full verbatim transcript of all spoken content in the video, organized chronologically with timestamps:
- Transcribe ALL dialogue, narration, and spoken content word-for-word
- Include speaker identification where multiple speakers are present
- Note filler words, false starts, laughter, and other vocal elements in [brackets]
- Use timestamps (MM:SS) to anchor each segment of speech
- If the video is very long, prioritize completeness over brevity — capture everything said
- For non-English content, provide the original language plus an English translation
## AUDIO & PRODUCTION ANALYSIS
- Speaking style, tone, energy, and how it shifts throughout
- Background music/sound design and its emotional effect
- Pauses, emphasis, and rhetorical techniques
- Sound mixing choices and audio quality
## DEEPER INSIGHTS & NON-OBVIOUS OBSERVATIONS
This is crucial—provide analysis that goes beyond what's surface-level:
- What is the creator's underlying message or agenda (stated or unstated)?
- What persuasion techniques or narrative structures are being used?
- What assumptions does the video make about its audience?
- What context (cultural, historical, industry-specific) helps understand this content?
- What biases or perspectives are present?
- What questions does this video raise but not answer?
- How does this connect to broader trends, debates, or topics?
- What might a casual viewer miss that's actually significant?
## CONTENT STRUCTURE & STRATEGY
- How is the video structured? What's the narrative arc?
- How does it hook viewers and maintain attention?
- What calls-to-action exist (explicit or implicit)?
- How does it compare to typical content in this genre/niche?
## CREATOR & CONTEXT
- Who made this and what's their background/credibility?
- What's the apparent purpose (educate, entertain, persuade, sell)?
- Who is the target audience and how can you tell?
Be extremely thorough. Length is not a concern—you have a large output budget, so use it. Your analysis should be comprehensive enough that someone could understand this video in rich detail without ever watching it. Include specific examples, timestamps, and direct observations rather than vague generalizations. Do not truncate or abbreviate any section."""
# ---------------------------------------------------------------------------
# URL helpers
# ---------------------------------------------------------------------------
def _is_youtube_url(url: str) -> bool:
if not url:
return False
url_lower = url.lower().strip()
return (
"youtube.com/watch" in url_lower
or "youtu.be/" in url_lower
or "youtube.com/shorts" in url_lower
)
def _extract_video_id(url: str) -> Optional[str]:
patterns = [
r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})",
r"(?:youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
r"(?:youtube\.com/v/)([a-zA-Z0-9_-]{11})",
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def _is_direct_video_url(url: str) -> bool:
"""Return True if the URL path ends with a known video extension."""
path = urlparse(url).path
ext = os.path.splitext(path)[1].lower()
return ext in VIDEO_EXTENSIONS
def _classify_url(url: str) -> str:
"""Classify a URL into ``'youtube'``, ``'direct'``, or ``'ytdlp'``."""
if _is_youtube_url(url):
return "youtube"
if _is_direct_video_url(url):
return "direct"
return "ytdlp"
# ---------------------------------------------------------------------------
# Metadata & FPS (shared — yt-dlp supports most sites)
# ---------------------------------------------------------------------------
def _calculate_fps(duration_seconds: int) -> float:
"""Calculate appropriate FPS based on video duration.
Shorter videos get higher FPS for more detail.
Longer videos get lower FPS to manage token usage.
"""
if duration_seconds <= 60:
return 4
elif duration_seconds <= 300:
return 2
elif duration_seconds <= 900:
return 1.5
elif duration_seconds <= 1800:
return 1
else:
return 0.5
def _get_video_metadata(url: str) -> Optional[dict]:
"""Get video metadata using yt-dlp. Works for YouTube, Rumble, Twitch,
and most sites yt-dlp supports. Returns None on any failure."""
try:
result = subprocess.run(
[
"yt-dlp",
"--cookies", YTDLP_COOKIES,
"--dump-json",
"--skip-download",
"--no-warnings",
"--no-playlist",
*YTDLP_METADATA_NETWORK_ARGS,
url,
],
capture_output=True,
text=True,
timeout=48,
)
if result.returncode != 0:
logger.warning("yt-dlp metadata failed (rc=%d): %s",
result.returncode, result.stderr[:200])
return None
info = parse_ytdlp_dump_json_stdout(result.stdout)
if not info:
logger.warning("yt-dlp metadata parse failed for %s", url)
return None
metadata = {
"title": info.get("title", "Unknown"),
"channel": info.get("channel", info.get("uploader", "Unknown")),
"channel_id": info.get("channel_id", ""),
"duration": int(info.get("duration", 0)),
"upload_date": info.get("upload_date", ""),
"view_count": info.get("view_count", 0),
"like_count": info.get("like_count", 0),
"description": info.get("description", ""),
"tags": info.get("tags", []),
"categories": info.get("categories", []),
"extractor": info.get("extractor", ""),
}
if metadata["upload_date"] and len(metadata["upload_date"]) == 8:
d = metadata["upload_date"]
metadata["upload_date_formatted"] = f"{d[:4]}-{d[4:6]}-{d[6:8]}"
else:
metadata["upload_date_formatted"] = metadata["upload_date"]
return metadata
except subprocess.TimeoutExpired as exc:
tail = ""
if exc.stderr:
tail = (
exc.stderr[-500:]
if isinstance(exc.stderr, str)
else exc.stderr.decode("utf-8", errors="replace")[-500:]
)
logger.warning(
"yt-dlp metadata timed out for %s (partial stderr: %r)",
url,
tail.replace("\n", " ")[:350] if tail else "<empty>",
)
return None
except Exception as e:
logger.warning("yt-dlp metadata fetch failed: %s", e)
return None
def _format_metadata_for_prompt(metadata: dict, source: str = "video platform") -> str:
"""Format video metadata as a string for inclusion in the prompt."""
duration = metadata["duration"]
duration_str = f"{duration // 60}:{duration % 60:02d}"
lines = [
f"Title: {metadata['title']}",
f"Channel: {metadata['channel']}",
f"Duration: {duration_str}",
]
if metadata.get("upload_date_formatted"):
lines.append(f"Upload Date: {metadata['upload_date_formatted']}")
if metadata.get("view_count"):
lines.append(f"Views: {metadata['view_count']:,}")
if metadata.get("like_count"):
lines.append(f"Likes: {metadata['like_count']:,}")
if metadata.get("tags"):
lines.append(f"Tags: {', '.join(metadata['tags'][:15])}")
if metadata.get("categories"):
lines.append(f"Categories: {', '.join(metadata['categories'])}")
if metadata.get("description"):
lines.append(f"\nVideo Description:\n{metadata['description']}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Non-YouTube: download helpers
# ---------------------------------------------------------------------------
async def _download_with_ytdlp(
url: str, temp_dir: str,
) -> tuple[Optional[str], Optional[str]]:
"""Download a video via yt-dlp. Returns ``(local_path, error)``."""
if not await asyncio.to_thread(shutil.which, "yt-dlp"):
return None, "yt-dlp is not installed."
template = os.path.join(temp_dir, "%(title).100s.%(ext)s")
cmd = [
"yt-dlp",
"--cookies", YTDLP_COOKIES,
"-f", YTDLP_FORMAT_SELECTOR,
"-o", template,
"--no-playlist",
"--no-overwrites",
"--restrict-filenames",
"--max-filesize", str(MAX_DOWNLOAD_SIZE),
"--print", "after_move:filepath",
url,
]
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(
proc.communicate(), timeout=DOWNLOAD_TIMEOUT,
)
out = stdout.decode("utf-8", errors="replace").strip()
err = stderr.decode("utf-8", errors="replace").strip()
if proc.returncode != 0:
msg = err or out or f"yt-dlp exit {proc.returncode}"
return None, msg
if out:
last_line = out.strip().split("\n")[-1].strip()
p = Path(last_line)
if p.exists():
size = p.stat().st_size
if size > MAX_DOWNLOAD_SIZE:
p.unlink(missing_ok=True)
return None, (
f"Downloaded file too large "
f"({size / 1024 / 1024:.0f} MB > "
f"{MAX_DOWNLOAD_SIZE // 1024 // 1024} MB limit)."
)
return str(p), None
# Fallback: find any media file in temp_dir
media_exts = {".mp4", ".webm", ".mkv", ".m4a", ".mp3", ".flv", ".avi"}
for f in Path(temp_dir).iterdir():
if f.is_file() and f.suffix.lower() in media_exts:
return str(f), None
return None, "Download completed but output file not found."
except asyncio.TimeoutError:
return None, f"Download timed out after {DOWNLOAD_TIMEOUT}s."
except Exception as exc:
return None, f"yt-dlp download error: {exc}"
async def _download_direct(
url: str, temp_dir: str,
) -> tuple[Optional[str], Optional[str]]:
"""Download a direct video file via HTTP. Returns ``(local_path, error)``."""
try:
parsed = urlparse(url)
filename = os.path.basename(unquote(parsed.path)) or "video.mp4"
filename = re.sub(r'[<>:"/\\|?*]', "_", filename)
local_path = os.path.join(temp_dir, filename)
timeout = aiohttp.ClientTimeout(total=DOWNLOAD_TIMEOUT)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url) as resp:
if resp.status != 200:
return None, f"HTTP {resp.status}: {resp.reason}"
cl = resp.headers.get("Content-Length")
if cl and int(cl) > MAX_DOWNLOAD_SIZE:
return None, (
f"File too large "
f"({int(cl) / 1024 / 1024:.0f} MB > "
f"{MAX_DOWNLOAD_SIZE // 1024 // 1024} MB limit)."
)
total = 0
async with aiofiles.open(local_path, "wb") as f:
async for chunk in resp.content.iter_chunked(65536):
total += len(chunk)
if total > MAX_DOWNLOAD_SIZE:
return None, (
f"File exceeded "
f"{MAX_DOWNLOAD_SIZE // 1024 // 1024} MB "
f"during download."
)
await f.write(chunk)
return local_path, None
except asyncio.TimeoutError:
return None, f"Download timed out after {DOWNLOAD_TIMEOUT}s."
except Exception as exc:
return None, f"HTTP download error: {exc}"
# ---------------------------------------------------------------------------
# Non-YouTube: Gemini File API upload
# ---------------------------------------------------------------------------
def _upload_to_gemini_sync(
client: genai.Client, local_path: str,
) -> tuple[Optional[object], Optional[str]]:
"""Upload a local video to the Gemini File API and poll until ACTIVE.
Blocking — call via ``asyncio.to_thread``.
Returns ``(gemini_file, error)``.
"""
import time
mime_type = mimetypes.guess_type(local_path)[0] or "video/mp4"
try:
uploaded = client.files.upload(
file=local_path,
config={"mime_type": mime_type},
)
logger.info("describe_video: uploaded %s as %s (state=%s)",
local_path, uploaded.name, uploaded.state)
except Exception as exc:
return None, f"File upload failed: {exc}"
elapsed = 0
while True:
state_str = str(getattr(uploaded, "state", "")).upper()
if "ACTIVE" in state_str:
break
if "PROCESSING" not in state_str:
# Neither ACTIVE nor PROCESSING — something went wrong
try:
client.files.delete(name=uploaded.name)
except Exception:
pass
return None, f"File processing failed (state={uploaded.state})."
if elapsed >= GEMINI_UPLOAD_TIMEOUT:
try:
client.files.delete(name=uploaded.name)
except Exception:
pass
return None, (
f"File processing timed out after {GEMINI_UPLOAD_TIMEOUT}s."
)
time.sleep(GEMINI_UPLOAD_POLL_INTERVAL)
elapsed += GEMINI_UPLOAD_POLL_INTERVAL
try:
uploaded = client.files.get(name=uploaded.name)
except Exception as exc:
return None, f"Failed to check file status: {exc}"
logger.info("describe_video: file %s state=%s (elapsed=%ds)",
uploaded.name, uploaded.state, elapsed)
return uploaded, None
# ---------------------------------------------------------------------------
# Shared: Gemini generation with model fallback
# ---------------------------------------------------------------------------
def _is_daily_quota_error(error_str: str) -> bool:
"""Detect daily-quota exhaustion from a genai SDK exception message."""
low = error_str.lower()
return "429" in low and "per day" in low
async def _generate_description(
client: genai.Client,
video_part: types.Part,
prompt: str,
video_label: str,
) -> tuple[Optional[str], Optional[str], Optional[str]]:
"""Call Gemini with model fallback and key rotation on daily quota 429s.
Returns ``(result_text, model_used, error_message)``.
"""
models_to_try = [DEFAULT_MODEL] + [
m for m in FALLBACK_MODELS if m != DEFAULT_MODEL
]
last_error: Optional[Exception] = None
current_client = client
daily_retries = 0
max_daily_retries = 3
for current_model in models_to_try:
try:
logger.info("describe_video: trying model=%s for %s",
current_model, video_label)
response = await asyncio.to_thread(
current_client.models.generate_content,
model=current_model,
contents=types.Content(
parts=[video_part, types.Part(text=prompt)],
),
config=types.GenerateContentConfig(
system_instruction=SYSTEM_INSTRUCTION,
media_resolution=types.MediaResolution.MEDIA_RESOLUTION_HIGH,
thinking_config=types.ThinkingConfig(thinking_budget=-1),
max_output_tokens=65000,
),
)
if response.usage_metadata:
logger.info(
"describe_video: %s model=%s prompt_tokens=%s "
"response_tokens=%s total_tokens=%s",
video_label, current_model,
response.usage_metadata.prompt_token_count,
response.usage_metadata.candidates_token_count,
response.usage_metadata.total_token_count,
)
return response.text, current_model, None
except Exception as e:
error_str = str(e)
last_error = e
if _is_daily_quota_error(error_str) and daily_retries < max_daily_retries:
try:
old_key = current_client._api_client.api_key
except AttributeError:
old_key = ""
if old_key:
await mark_key_daily_spent(old_key, "generate")
new_key = next_gemini_flash_key()
current_client = genai.Client(api_key=new_key)
daily_retries += 1
logger.warning(
"describe_video: daily quota hit, rotated to new key "
"(attempt %d/%d)",
daily_retries, max_daily_retries,
)
continue
is_retriable = any(
kw in error_str.lower()
for kw in (
"503", "429", "overloaded",
"rate limit", "resource exhausted",
)
)
if is_retriable and current_model != models_to_try[-1]:
logger.warning(
"describe_video: model=%s retriable error: %s — falling back",
current_model, error_str[:200],
)
continue
else:
logger.error("describe_video: model=%s failed: %s",
current_model, error_str[:500])
break
return None, None, f"Failed to describe video: {last_error}"
# ---------------------------------------------------------------------------
# Tool registration
# ---------------------------------------------------------------------------
TOOL_NAME = "describe_video"
TOOL_DESCRIPTION = (
"Provide an extremely detailed description and analysis of a video "
"using the Gemini API. Supports YouTube (native ingestion), Rumble, "
"Twitch VODs, direct MP4 files, and 1000+ other video sites via yt-dlp. "
"Processing time depends on video length (typically 30-120 seconds). "
"Works best with videos under 1 hour."
)
TOOL_PARAMETERS = {
"type": "object",
"properties": {
"video_url": {
"type": "string",
"description": (
"Full URL of the video to analyse "
"(YouTube, Rumble, Twitch, direct MP4, etc.)."
),
},
"focus_area": {
"type": "string",
"description": (
"Optional aspect to focus on (e.g. 'visual effects', "
"'speaker arguments', 'music analysis', 'tutorial steps')."
),
},
},
"required": ["video_url"],
}
[docs]
async def run(
video_url: str = "",
youtube_url: str = "",
focus_area: Optional[str] = None,
ctx=None,
) -> str:
"""Describe and analyse a video from any supported platform."""
url = video_url or youtube_url
if not url:
return json.dumps({
"error": "Missing required argument: video_url is required.",
})
url_type = _classify_url(url)
video_label = url
if url_type == "youtube":
video_id = _extract_video_id(url)
if not video_id:
return json.dumps({
"error": "Could not extract video ID from YouTube URL.",
})
video_label = f"yt:{video_id}"
logger.info("describe_video: url_type=%s label=%s", url_type, video_label)
# ------------------------------------------------------------------
# Resolve Gemini API key — prefer user key over default
# ------------------------------------------------------------------
user_gemini_key = None
if ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None):
try:
from tools.manage_api_keys import get_user_api_key
user_gemini_key = await get_user_api_key(
ctx.user_id, "gemini",
redis_client=ctx.redis,
channel_id=getattr(ctx, "channel_id", None),
config=getattr(ctx, "config", None),
)
except Exception:
pass
if user_gemini_key:
client = genai.Client(api_key=user_gemini_key)
logger.info("describe_video: using user-provided Gemini API key")
_using_default_key = False
else:
if ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None):
from tools.manage_api_keys import (
check_default_key_limit,
default_key_limit_error,
)
allowed, current, limit = await check_default_key_limit(
ctx.user_id, "describe_video", ctx.redis, daily_limit=20,
)
if not allowed:
return json.dumps({
"error": default_key_limit_error(
"describe_video", current, limit,
),
})
client = genai.Client(api_key=next_gemini_flash_key())
_using_default_key = True
# ------------------------------------------------------------------
# Fetch metadata via yt-dlp (best-effort — tool works without it)
# ------------------------------------------------------------------
metadata = await asyncio.to_thread(_get_video_metadata, url)
if metadata and metadata.get("duration"):
duration = metadata["duration"]
if duration > MAX_DURATION_SECONDS:
return json.dumps({
"error": (
f"Video is too long ({duration // 60} minutes). "
f"Maximum allowed duration is "
f"{MAX_DURATION_SECONDS // 60} minutes."
),
})
fps = _calculate_fps(duration)
logger.info("describe_video: %s title=%r duration=%ds fps=%.1f",
video_label, metadata.get("title", "?"), duration, fps)
else:
fps = 1.0
logger.info("describe_video: no metadata, default fps=%.1f", fps)
# ------------------------------------------------------------------
# Build prompt with metadata + analysis template
# ------------------------------------------------------------------
source_name = "YouTube" if url_type == "youtube" else (
(metadata.get("extractor") or url_type).replace("_", " ").title()
if metadata else url_type.title()
)
prompt_parts: list[str] = []
if metadata:
metadata_str = _format_metadata_for_prompt(metadata, source=source_name)
prompt_parts.append(
f"## VIDEO METADATA (from {source_name})\n\n"
f"{metadata_str}\n\n---\n"
)
prompt_parts.append(ANALYSIS_PROMPT)
if focus_area:
prompt_parts.append(f"""
## SPECIFIC FOCUS AREA
The user has requested special attention to the following:
\"\"\"{focus_area}\"\"\"
While still providing comprehensive analysis, give EXTRA DEPTH AND DETAIL to this specific area. If timestamps are mentioned, pay particular attention to those sections. If topics are mentioned, explore them more thoroughly than other aspects.""")
prompt = "\n".join(prompt_parts)
# ------------------------------------------------------------------
# YouTube: native Gemini ingestion (fast path — no download)
# ------------------------------------------------------------------
if url_type == "youtube":
video_part = types.Part(
file_data=types.FileData(file_uri=url),
video_metadata=types.VideoMetadata(fps=fps),
)
result_text, model_used, err = await _generate_description(
client, video_part, prompt, video_label,
)
if err:
return json.dumps({"error": err})
logger.info("describe_video: %s done (%d chars)", video_label, len(result_text))
result = {
"success": True,
"video_id": _extract_video_id(url),
"video_url": url,
"source": "youtube",
"model_used": model_used,
"description": result_text,
}
if metadata:
result["title"] = metadata.get("title")
result["channel"] = metadata.get("channel")
result["duration_seconds"] = metadata.get("duration")
if _using_default_key and ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None):
from tools.manage_api_keys import increment_default_key_usage
await increment_default_key_usage(
ctx.user_id, "describe_video", ctx.redis,
)
return json.dumps(result)
# ------------------------------------------------------------------
# Non-YouTube: download → upload → describe → cleanup
# ------------------------------------------------------------------
temp_dir = tempfile.mkdtemp(prefix="describe_video_")
gemini_file = None
try:
# --- Download ------------------------------------------------
if url_type == "direct":
local_path, dl_err = await _download_direct(url, temp_dir)
if dl_err:
# Direct download failed — try yt-dlp as fallback
local_path, dl_err = await _download_with_ytdlp(url, temp_dir)
else:
local_path, dl_err = await _download_with_ytdlp(url, temp_dir)
if dl_err and _is_direct_video_url(url):
local_path, dl_err = await _download_direct(url, temp_dir)
if dl_err or not local_path:
return json.dumps({
"error": f"Failed to download video: {dl_err}",
})
file_size = os.path.getsize(local_path)
logger.info("describe_video: downloaded %s (%.1f MB)",
local_path, file_size / 1024 / 1024)
# --- Upload to Gemini File API -------------------------------
gemini_file, up_err = await asyncio.to_thread(
_upload_to_gemini_sync, client, local_path,
)
if up_err or not gemini_file:
return json.dumps({
"error": f"Failed to upload video to Gemini: {up_err}",
})
# --- Generate description ------------------------------------
video_part = types.Part(
file_data=types.FileData(file_uri=gemini_file.uri),
video_metadata=types.VideoMetadata(fps=fps),
)
result_text, model_used, gen_err = await _generate_description(
client, video_part, prompt, video_label,
)
if gen_err:
return json.dumps({"error": gen_err})
logger.info("describe_video: %s done (%d chars)", video_label, len(result_text))
result = {
"success": True,
"video_url": url,
"source": url_type,
"model_used": model_used,
"description": result_text,
}
if metadata:
result["title"] = metadata.get("title")
result["channel"] = metadata.get("channel")
result["duration_seconds"] = metadata.get("duration")
if metadata.get("extractor"):
result["platform"] = metadata["extractor"]
if _using_default_key and ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None):
from tools.manage_api_keys import increment_default_key_usage
await increment_default_key_usage(
ctx.user_id, "describe_video", ctx.redis,
)
return json.dumps(result)
finally:
# Cleanup local temp files
shutil.rmtree(temp_dir, ignore_errors=True)
# Cleanup Gemini-hosted file
if gemini_file:
try:
await asyncio.to_thread(
client.files.delete, name=gemini_file.name,
)
logger.info("describe_video: deleted Gemini file %s",
gemini_file.name)
except Exception:
logger.warning("describe_video: failed to delete Gemini file %s",
getattr(gemini_file, "name", "?"))