Source code for tools.youtube_describe

"""Analyse a video via the Gemini API — YouTube (native ingestion), Rumble,
Twitch, direct MP4, and 1000+ other sites via yt-dlp."""

import asyncio
import json
import logging
import mimetypes
import os
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
from urllib.parse import unquote, urlparse

import aiofiles
import aiohttp

from google import genai
from google.genai import types

from gemini_embed_pool import mark_key_daily_spent, next_gemini_flash_key
from url_utils import YTDLP_METADATA_NETWORK_ARGS, parse_ytdlp_dump_json_stdout

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

MAX_DURATION_SECONDS = 3600

DEFAULT_MODEL = "gemini-3.1-flash-lite-preview"
FALLBACK_MODELS = ["gemini-3-flash-preview", "gemini-3.1-pro-preview"]

MAX_DOWNLOAD_SIZE = 200 * 1024 * 1024  # 200 MB
DOWNLOAD_TIMEOUT = 300
YTDLP_COOKIES = "/root/cookies.txt"
YTDLP_FORMAT_SELECTOR = (
    "bestvideo[height<=720]+bestaudio/best[height<=720]/best"
)
GEMINI_UPLOAD_POLL_INTERVAL = 10  # seconds
GEMINI_UPLOAD_TIMEOUT = 300  # 5 minutes

VIDEO_EXTENSIONS = frozenset({
    ".mp4", ".webm", ".mkv", ".mov", ".avi", ".flv",
    ".wmv", ".m4v", ".ts", ".mpeg", ".mpg", ".3gp",
})

SYSTEM_INSTRUCTION = """You are an expert video analyst, content researcher, and transcriptionist. You provide both an executive summary for quick understanding AND exhaustive, highly detailed descriptions that capture everything happening in a video, along with deeper insights that most viewers would miss.

You watch videos with the eye of a film critic, the curiosity of a researcher, and the attention to detail of an investigative journalist. You notice subtle details: background elements, editing choices, body language, tone shifts, implied meanings, cultural references, and connections to broader topics.

Your output should serve two purposes: (1) let someone quickly grasp the essence of a video, and (2) provide enough depth that they could understand not just WHAT happened, but HOW it was presented, WHY certain choices were made, and WHAT deeper meanings or implications exist.

When dialogue or narration is present, you transcribe it VERBATIM whenever possible. You capture not just the gist, but the exact words spoken. Your transcripts preserve filler words, false starts, and natural speech patterns to give an authentic record of what was said.

LENGTH IS NOT A CONSTRAINT. You have a 65,000 token output budget. Use as much space as needed to be thorough. Never truncate, abbreviate, or skip details to save space. More detail is always better. Err on the side of being too comprehensive rather than too brief.

You dig deep. You connect dots. You provide value beyond what's obvious."""

ANALYSIS_PROMPT = """Provide an exhaustive, deeply detailed analysis of this video.

## EXECUTIVE SUMMARY

Start with a summary (5-8 sentences) that captures:
- What this video is about and its main topic/purpose
- The key takeaway or central message
- Who made it and who it's for
- Why it matters or what makes it notable

---

## FULL VIDEO DESCRIPTION

Walk through the ENTIRE video chronologically with granular detail:
- Describe every segment, scene, and transition
- Note exactly what is shown visually at each moment (settings, objects, people, text overlays, graphics)
- Capture what is said, including notable phrasing, tone, and delivery
- Include timestamps (MM:SS) throughout to anchor your description
- Don't skip anything—even "minor" moments often contain important context

## VISUAL & PRODUCTION ANALYSIS

- Camera work: shots, angles, movements, framing choices
- Editing style: pacing, cuts, transitions, rhythm
- Graphics, animations, text overlays, and their timing
- Color grading, lighting, visual mood
- B-roll footage and how it's used
- Thumbnail and title analysis (if visible/relevant)

## TRANSCRIPT / DIALOGUE

Provide a full verbatim transcript of all spoken content in the video, organized chronologically with timestamps:
- Transcribe ALL dialogue, narration, and spoken content word-for-word
- Include speaker identification where multiple speakers are present
- Note filler words, false starts, laughter, and other vocal elements in [brackets]
- Use timestamps (MM:SS) to anchor each segment of speech
- If the video is very long, prioritize completeness over brevity — capture everything said
- For non-English content, provide the original language plus an English translation

## AUDIO & PRODUCTION ANALYSIS

- Speaking style, tone, energy, and how it shifts throughout
- Background music/sound design and its emotional effect
- Pauses, emphasis, and rhetorical techniques
- Sound mixing choices and audio quality

## DEEPER INSIGHTS & NON-OBVIOUS OBSERVATIONS

This is crucial—provide analysis that goes beyond what's surface-level:
- What is the creator's underlying message or agenda (stated or unstated)?
- What persuasion techniques or narrative structures are being used?
- What assumptions does the video make about its audience?
- What context (cultural, historical, industry-specific) helps understand this content?
- What biases or perspectives are present?
- What questions does this video raise but not answer?
- How does this connect to broader trends, debates, or topics?
- What might a casual viewer miss that's actually significant?

## CONTENT STRUCTURE & STRATEGY

- How is the video structured? What's the narrative arc?
- How does it hook viewers and maintain attention?
- What calls-to-action exist (explicit or implicit)?
- How does it compare to typical content in this genre/niche?

## CREATOR & CONTEXT

- Who made this and what's their background/credibility?
- What's the apparent purpose (educate, entertain, persuade, sell)?
- Who is the target audience and how can you tell?

Be extremely thorough. Length is not a concern—you have a large output budget, so use it. Your analysis should be comprehensive enough that someone could understand this video in rich detail without ever watching it. Include specific examples, timestamps, and direct observations rather than vague generalizations. Do not truncate or abbreviate any section."""


# ---------------------------------------------------------------------------
# URL helpers
# ---------------------------------------------------------------------------

def _is_youtube_url(url: str) -> bool:
    if not url:
        return False
    url_lower = url.lower().strip()
    return (
        "youtube.com/watch" in url_lower
        or "youtu.be/" in url_lower
        or "youtube.com/shorts" in url_lower
    )


def _extract_video_id(url: str) -> Optional[str]:
    patterns = [
        r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})",
        r"(?:youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
        r"(?:youtube\.com/v/)([a-zA-Z0-9_-]{11})",
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None


def _is_direct_video_url(url: str) -> bool:
    """Return True if the URL path ends with a known video extension."""
    path = urlparse(url).path
    ext = os.path.splitext(path)[1].lower()
    return ext in VIDEO_EXTENSIONS


def _classify_url(url: str) -> str:
    """Classify a URL into ``'youtube'``, ``'direct'``, or ``'ytdlp'``."""
    if _is_youtube_url(url):
        return "youtube"
    if _is_direct_video_url(url):
        return "direct"
    return "ytdlp"


# ---------------------------------------------------------------------------
# Metadata & FPS (shared — yt-dlp supports most sites)
# ---------------------------------------------------------------------------

def _calculate_fps(duration_seconds: int) -> float:
    """Calculate appropriate FPS based on video duration.

    Shorter videos get higher FPS for more detail.
    Longer videos get lower FPS to manage token usage.
    """
    if duration_seconds <= 60:
        return 4
    elif duration_seconds <= 300:
        return 2
    elif duration_seconds <= 900:
        return 1.5
    elif duration_seconds <= 1800:
        return 1
    else:
        return 0.5


def _get_video_metadata(url: str) -> Optional[dict]:
    """Get video metadata using yt-dlp.  Works for YouTube, Rumble, Twitch,
    and most sites yt-dlp supports.  Returns None on any failure."""
    try:
        result = subprocess.run(
            [
                "yt-dlp",
                "--cookies", YTDLP_COOKIES,
                "--dump-json",
                "--skip-download",
                "--no-warnings",
                "--no-playlist",
                *YTDLP_METADATA_NETWORK_ARGS,
                url,
            ],
            capture_output=True,
            text=True,
            timeout=48,
        )
        if result.returncode != 0:
            logger.warning("yt-dlp metadata failed (rc=%d): %s",
                           result.returncode, result.stderr[:200])
            return None

        info = parse_ytdlp_dump_json_stdout(result.stdout)
        if not info:
            logger.warning("yt-dlp metadata parse failed for %s", url)
            return None
        metadata = {
            "title": info.get("title", "Unknown"),
            "channel": info.get("channel", info.get("uploader", "Unknown")),
            "channel_id": info.get("channel_id", ""),
            "duration": int(info.get("duration", 0)),
            "upload_date": info.get("upload_date", ""),
            "view_count": info.get("view_count", 0),
            "like_count": info.get("like_count", 0),
            "description": info.get("description", ""),
            "tags": info.get("tags", []),
            "categories": info.get("categories", []),
            "extractor": info.get("extractor", ""),
        }

        if metadata["upload_date"] and len(metadata["upload_date"]) == 8:
            d = metadata["upload_date"]
            metadata["upload_date_formatted"] = f"{d[:4]}-{d[4:6]}-{d[6:8]}"
        else:
            metadata["upload_date_formatted"] = metadata["upload_date"]

        return metadata

    except subprocess.TimeoutExpired as exc:
        tail = ""
        if exc.stderr:
            tail = (
                exc.stderr[-500:]
                if isinstance(exc.stderr, str)
                else exc.stderr.decode("utf-8", errors="replace")[-500:]
            )
        logger.warning(
            "yt-dlp metadata timed out for %s (partial stderr: %r)",
            url,
            tail.replace("\n", " ")[:350] if tail else "<empty>",
        )
        return None
    except Exception as e:
        logger.warning("yt-dlp metadata fetch failed: %s", e)
        return None


def _format_metadata_for_prompt(metadata: dict, source: str = "video platform") -> str:
    """Format video metadata as a string for inclusion in the prompt."""
    duration = metadata["duration"]
    duration_str = f"{duration // 60}:{duration % 60:02d}"

    lines = [
        f"Title: {metadata['title']}",
        f"Channel: {metadata['channel']}",
        f"Duration: {duration_str}",
    ]

    if metadata.get("upload_date_formatted"):
        lines.append(f"Upload Date: {metadata['upload_date_formatted']}")
    if metadata.get("view_count"):
        lines.append(f"Views: {metadata['view_count']:,}")
    if metadata.get("like_count"):
        lines.append(f"Likes: {metadata['like_count']:,}")
    if metadata.get("tags"):
        lines.append(f"Tags: {', '.join(metadata['tags'][:15])}")
    if metadata.get("categories"):
        lines.append(f"Categories: {', '.join(metadata['categories'])}")
    if metadata.get("description"):
        lines.append(f"\nVideo Description:\n{metadata['description']}")

    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Non-YouTube: download helpers
# ---------------------------------------------------------------------------

async def _download_with_ytdlp(
    url: str, temp_dir: str,
) -> tuple[Optional[str], Optional[str]]:
    """Download a video via yt-dlp.  Returns ``(local_path, error)``."""
    if not await asyncio.to_thread(shutil.which, "yt-dlp"):
        return None, "yt-dlp is not installed."

    template = os.path.join(temp_dir, "%(title).100s.%(ext)s")
    cmd = [
        "yt-dlp",
        "--cookies", YTDLP_COOKIES,
        "-f", YTDLP_FORMAT_SELECTOR,
        "-o", template,
        "--no-playlist",
        "--no-overwrites",
        "--restrict-filenames",
        "--max-filesize", str(MAX_DOWNLOAD_SIZE),
        "--print", "after_move:filepath",
        url,
    ]

    try:
        proc = await asyncio.create_subprocess_exec(
            *cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout, stderr = await asyncio.wait_for(
            proc.communicate(), timeout=DOWNLOAD_TIMEOUT,
        )
        out = stdout.decode("utf-8", errors="replace").strip()
        err = stderr.decode("utf-8", errors="replace").strip()

        if proc.returncode != 0:
            msg = err or out or f"yt-dlp exit {proc.returncode}"
            return None, msg

        if out:
            last_line = out.strip().split("\n")[-1].strip()
            p = Path(last_line)
            if p.exists():
                size = p.stat().st_size
                if size > MAX_DOWNLOAD_SIZE:
                    p.unlink(missing_ok=True)
                    return None, (
                        f"Downloaded file too large "
                        f"({size / 1024 / 1024:.0f} MB > "
                        f"{MAX_DOWNLOAD_SIZE // 1024 // 1024} MB limit)."
                    )
                return str(p), None

        # Fallback: find any media file in temp_dir
        media_exts = {".mp4", ".webm", ".mkv", ".m4a", ".mp3", ".flv", ".avi"}
        for f in Path(temp_dir).iterdir():
            if f.is_file() and f.suffix.lower() in media_exts:
                return str(f), None

        return None, "Download completed but output file not found."

    except asyncio.TimeoutError:
        return None, f"Download timed out after {DOWNLOAD_TIMEOUT}s."
    except Exception as exc:
        return None, f"yt-dlp download error: {exc}"


async def _download_direct(
    url: str, temp_dir: str,
) -> tuple[Optional[str], Optional[str]]:
    """Download a direct video file via HTTP.  Returns ``(local_path, error)``."""
    try:
        parsed = urlparse(url)
        filename = os.path.basename(unquote(parsed.path)) or "video.mp4"
        filename = re.sub(r'[<>:"/\\|?*]', "_", filename)
        local_path = os.path.join(temp_dir, filename)

        timeout = aiohttp.ClientTimeout(total=DOWNLOAD_TIMEOUT)
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(url) as resp:
                if resp.status != 200:
                    return None, f"HTTP {resp.status}: {resp.reason}"

                cl = resp.headers.get("Content-Length")
                if cl and int(cl) > MAX_DOWNLOAD_SIZE:
                    return None, (
                        f"File too large "
                        f"({int(cl) / 1024 / 1024:.0f} MB > "
                        f"{MAX_DOWNLOAD_SIZE // 1024 // 1024} MB limit)."
                    )

                total = 0
                async with aiofiles.open(local_path, "wb") as f:
                    async for chunk in resp.content.iter_chunked(65536):
                        total += len(chunk)
                        if total > MAX_DOWNLOAD_SIZE:
                            return None, (
                                f"File exceeded "
                                f"{MAX_DOWNLOAD_SIZE // 1024 // 1024} MB "
                                f"during download."
                            )
                        await f.write(chunk)

        return local_path, None

    except asyncio.TimeoutError:
        return None, f"Download timed out after {DOWNLOAD_TIMEOUT}s."
    except Exception as exc:
        return None, f"HTTP download error: {exc}"


# ---------------------------------------------------------------------------
# Non-YouTube: Gemini File API upload
# ---------------------------------------------------------------------------

def _upload_to_gemini_sync(
    client: genai.Client, local_path: str,
) -> tuple[Optional[object], Optional[str]]:
    """Upload a local video to the Gemini File API and poll until ACTIVE.

    Blocking — call via ``asyncio.to_thread``.
    Returns ``(gemini_file, error)``.
    """
    import time

    mime_type = mimetypes.guess_type(local_path)[0] or "video/mp4"

    try:
        uploaded = client.files.upload(
            file=local_path,
            config={"mime_type": mime_type},
        )
        logger.info("describe_video: uploaded %s as %s (state=%s)",
                     local_path, uploaded.name, uploaded.state)
    except Exception as exc:
        return None, f"File upload failed: {exc}"

    elapsed = 0
    while True:
        state_str = str(getattr(uploaded, "state", "")).upper()
        if "ACTIVE" in state_str:
            break
        if "PROCESSING" not in state_str:
            # Neither ACTIVE nor PROCESSING — something went wrong
            try:
                client.files.delete(name=uploaded.name)
            except Exception:
                pass
            return None, f"File processing failed (state={uploaded.state})."
        if elapsed >= GEMINI_UPLOAD_TIMEOUT:
            try:
                client.files.delete(name=uploaded.name)
            except Exception:
                pass
            return None, (
                f"File processing timed out after {GEMINI_UPLOAD_TIMEOUT}s."
            )
        time.sleep(GEMINI_UPLOAD_POLL_INTERVAL)
        elapsed += GEMINI_UPLOAD_POLL_INTERVAL
        try:
            uploaded = client.files.get(name=uploaded.name)
        except Exception as exc:
            return None, f"Failed to check file status: {exc}"
        logger.info("describe_video: file %s state=%s (elapsed=%ds)",
                     uploaded.name, uploaded.state, elapsed)

    return uploaded, None


# ---------------------------------------------------------------------------
# Shared: Gemini generation with model fallback
# ---------------------------------------------------------------------------

def _is_daily_quota_error(error_str: str) -> bool:
    """Detect daily-quota exhaustion from a genai SDK exception message."""
    low = error_str.lower()
    return "429" in low and "per day" in low


async def _generate_description(
    client: genai.Client,
    video_part: types.Part,
    prompt: str,
    video_label: str,
) -> tuple[Optional[str], Optional[str], Optional[str]]:
    """Call Gemini with model fallback and key rotation on daily quota 429s.

    Returns ``(result_text, model_used, error_message)``.
    """
    models_to_try = [DEFAULT_MODEL] + [
        m for m in FALLBACK_MODELS if m != DEFAULT_MODEL
    ]

    last_error: Optional[Exception] = None
    current_client = client
    daily_retries = 0
    max_daily_retries = 3

    for current_model in models_to_try:
        try:
            logger.info("describe_video: trying model=%s for %s",
                        current_model, video_label)

            response = await asyncio.to_thread(
                current_client.models.generate_content,
                model=current_model,
                contents=types.Content(
                    parts=[video_part, types.Part(text=prompt)],
                ),
                config=types.GenerateContentConfig(
                    system_instruction=SYSTEM_INSTRUCTION,
                    media_resolution=types.MediaResolution.MEDIA_RESOLUTION_HIGH,
                    thinking_config=types.ThinkingConfig(thinking_budget=-1),
                    max_output_tokens=65000,
                ),
            )

            if response.usage_metadata:
                logger.info(
                    "describe_video: %s model=%s prompt_tokens=%s "
                    "response_tokens=%s total_tokens=%s",
                    video_label, current_model,
                    response.usage_metadata.prompt_token_count,
                    response.usage_metadata.candidates_token_count,
                    response.usage_metadata.total_token_count,
                )

            return response.text, current_model, None

        except Exception as e:
            error_str = str(e)
            last_error = e

            if _is_daily_quota_error(error_str) and daily_retries < max_daily_retries:
                try:
                    old_key = current_client._api_client.api_key
                except AttributeError:
                    old_key = ""
                if old_key:
                    await mark_key_daily_spent(old_key, "generate")
                new_key = next_gemini_flash_key()
                current_client = genai.Client(api_key=new_key)
                daily_retries += 1
                logger.warning(
                    "describe_video: daily quota hit, rotated to new key "
                    "(attempt %d/%d)",
                    daily_retries, max_daily_retries,
                )
                continue

            is_retriable = any(
                kw in error_str.lower()
                for kw in (
                    "503", "429", "overloaded",
                    "rate limit", "resource exhausted",
                )
            )
            if is_retriable and current_model != models_to_try[-1]:
                logger.warning(
                    "describe_video: model=%s retriable error: %s — falling back",
                    current_model, error_str[:200],
                )
                continue
            else:
                logger.error("describe_video: model=%s failed: %s",
                             current_model, error_str[:500])
                break

    return None, None, f"Failed to describe video: {last_error}"


# ---------------------------------------------------------------------------
# Tool registration
# ---------------------------------------------------------------------------

TOOL_NAME = "describe_video"
TOOL_DESCRIPTION = (
    "Provide an extremely detailed description and analysis of a video "
    "using the Gemini API. Supports YouTube (native ingestion), Rumble, "
    "Twitch VODs, direct MP4 files, and 1000+ other video sites via yt-dlp. "
    "Processing time depends on video length (typically 30-120 seconds). "
    "Works best with videos under 1 hour."
)
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "video_url": {
            "type": "string",
            "description": (
                "Full URL of the video to analyse "
                "(YouTube, Rumble, Twitch, direct MP4, etc.)."
            ),
        },
        "focus_area": {
            "type": "string",
            "description": (
                "Optional aspect to focus on (e.g. 'visual effects', "
                "'speaker arguments', 'music analysis', 'tutorial steps')."
            ),
        },
    },
    "required": ["video_url"],
}


[docs] async def run( video_url: str = "", youtube_url: str = "", focus_area: Optional[str] = None, ctx=None, ) -> str: """Describe and analyse a video from any supported platform.""" url = video_url or youtube_url if not url: return json.dumps({ "error": "Missing required argument: video_url is required.", }) url_type = _classify_url(url) video_label = url if url_type == "youtube": video_id = _extract_video_id(url) if not video_id: return json.dumps({ "error": "Could not extract video ID from YouTube URL.", }) video_label = f"yt:{video_id}" logger.info("describe_video: url_type=%s label=%s", url_type, video_label) # ------------------------------------------------------------------ # Resolve Gemini API key — prefer user key over default # ------------------------------------------------------------------ user_gemini_key = None if ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None): try: from tools.manage_api_keys import get_user_api_key user_gemini_key = await get_user_api_key( ctx.user_id, "gemini", redis_client=ctx.redis, channel_id=getattr(ctx, "channel_id", None), config=getattr(ctx, "config", None), ) except Exception: pass if user_gemini_key: client = genai.Client(api_key=user_gemini_key) logger.info("describe_video: using user-provided Gemini API key") _using_default_key = False else: if ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None): from tools.manage_api_keys import ( check_default_key_limit, default_key_limit_error, ) allowed, current, limit = await check_default_key_limit( ctx.user_id, "describe_video", ctx.redis, daily_limit=20, ) if not allowed: return json.dumps({ "error": default_key_limit_error( "describe_video", current, limit, ), }) client = genai.Client(api_key=next_gemini_flash_key()) _using_default_key = True # ------------------------------------------------------------------ # Fetch metadata via yt-dlp (best-effort — tool works without it) # ------------------------------------------------------------------ metadata = await asyncio.to_thread(_get_video_metadata, url) if metadata and metadata.get("duration"): duration = metadata["duration"] if duration > MAX_DURATION_SECONDS: return json.dumps({ "error": ( f"Video is too long ({duration // 60} minutes). " f"Maximum allowed duration is " f"{MAX_DURATION_SECONDS // 60} minutes." ), }) fps = _calculate_fps(duration) logger.info("describe_video: %s title=%r duration=%ds fps=%.1f", video_label, metadata.get("title", "?"), duration, fps) else: fps = 1.0 logger.info("describe_video: no metadata, default fps=%.1f", fps) # ------------------------------------------------------------------ # Build prompt with metadata + analysis template # ------------------------------------------------------------------ source_name = "YouTube" if url_type == "youtube" else ( (metadata.get("extractor") or url_type).replace("_", " ").title() if metadata else url_type.title() ) prompt_parts: list[str] = [] if metadata: metadata_str = _format_metadata_for_prompt(metadata, source=source_name) prompt_parts.append( f"## VIDEO METADATA (from {source_name})\n\n" f"{metadata_str}\n\n---\n" ) prompt_parts.append(ANALYSIS_PROMPT) if focus_area: prompt_parts.append(f""" ## SPECIFIC FOCUS AREA The user has requested special attention to the following: \"\"\"{focus_area}\"\"\" While still providing comprehensive analysis, give EXTRA DEPTH AND DETAIL to this specific area. If timestamps are mentioned, pay particular attention to those sections. If topics are mentioned, explore them more thoroughly than other aspects.""") prompt = "\n".join(prompt_parts) # ------------------------------------------------------------------ # YouTube: native Gemini ingestion (fast path — no download) # ------------------------------------------------------------------ if url_type == "youtube": video_part = types.Part( file_data=types.FileData(file_uri=url), video_metadata=types.VideoMetadata(fps=fps), ) result_text, model_used, err = await _generate_description( client, video_part, prompt, video_label, ) if err: return json.dumps({"error": err}) logger.info("describe_video: %s done (%d chars)", video_label, len(result_text)) result = { "success": True, "video_id": _extract_video_id(url), "video_url": url, "source": "youtube", "model_used": model_used, "description": result_text, } if metadata: result["title"] = metadata.get("title") result["channel"] = metadata.get("channel") result["duration_seconds"] = metadata.get("duration") if _using_default_key and ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None): from tools.manage_api_keys import increment_default_key_usage await increment_default_key_usage( ctx.user_id, "describe_video", ctx.redis, ) return json.dumps(result) # ------------------------------------------------------------------ # Non-YouTube: download → upload → describe → cleanup # ------------------------------------------------------------------ temp_dir = tempfile.mkdtemp(prefix="describe_video_") gemini_file = None try: # --- Download ------------------------------------------------ if url_type == "direct": local_path, dl_err = await _download_direct(url, temp_dir) if dl_err: # Direct download failed — try yt-dlp as fallback local_path, dl_err = await _download_with_ytdlp(url, temp_dir) else: local_path, dl_err = await _download_with_ytdlp(url, temp_dir) if dl_err and _is_direct_video_url(url): local_path, dl_err = await _download_direct(url, temp_dir) if dl_err or not local_path: return json.dumps({ "error": f"Failed to download video: {dl_err}", }) file_size = os.path.getsize(local_path) logger.info("describe_video: downloaded %s (%.1f MB)", local_path, file_size / 1024 / 1024) # --- Upload to Gemini File API ------------------------------- gemini_file, up_err = await asyncio.to_thread( _upload_to_gemini_sync, client, local_path, ) if up_err or not gemini_file: return json.dumps({ "error": f"Failed to upload video to Gemini: {up_err}", }) # --- Generate description ------------------------------------ video_part = types.Part( file_data=types.FileData(file_uri=gemini_file.uri), video_metadata=types.VideoMetadata(fps=fps), ) result_text, model_used, gen_err = await _generate_description( client, video_part, prompt, video_label, ) if gen_err: return json.dumps({"error": gen_err}) logger.info("describe_video: %s done (%d chars)", video_label, len(result_text)) result = { "success": True, "video_url": url, "source": url_type, "model_used": model_used, "description": result_text, } if metadata: result["title"] = metadata.get("title") result["channel"] = metadata.get("channel") result["duration_seconds"] = metadata.get("duration") if metadata.get("extractor"): result["platform"] = metadata["extractor"] if _using_default_key and ctx and getattr(ctx, "redis", None) and getattr(ctx, "user_id", None): from tools.manage_api_keys import increment_default_key_usage await increment_default_key_usage( ctx.user_id, "describe_video", ctx.redis, ) return json.dumps(result) finally: # Cleanup local temp files shutil.rmtree(temp_dir, ignore_errors=True) # Cleanup Gemini-hosted file if gemini_file: try: await asyncio.to_thread( client.files.delete, name=gemini_file.name, ) logger.info("describe_video: deleted Gemini file %s", gemini_file.name) except Exception: logger.warning("describe_video: failed to delete Gemini file %s", getattr(gemini_file, "name", "?"))