Source code for url_utils.image

"""Direct image URL detection and download."""

from __future__ import annotations

import asyncio
import logging
import re
from typing import Any, Dict, Optional

import aiohttp

logger = logging.getLogger(__name__)

_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
_MAX_IMAGE_DOWNLOAD = 20 * 1024 * 1024  # 20 MB

_IMAGE_URL_PATTERN = re.compile(
    r"(https?://)"
    r"("
    r"cdn\.discordapp\.com/attachments/[^\s]+"
    r"|media\.discordapp\.net/attachments/[^\s]+"
    r"|i\.imgur\.com/[a-zA-Z0-9]+\.[a-zA-Z]+"
    r"|[^\s]+\.(?:png|jpe?g|gif|webp|bmp)(?:\?[^\s]*)?"
    r")",
    re.IGNORECASE,
)



[docs]
def is_image_url(url: str) -> bool:
    return _IMAGE_URL_PATTERN.search(url) is not None




[docs]
async def download_image_url(
    url: str,
) -> Optional[Dict[str, Any]]:
    try:
        norm = url.strip()
        if not norm.startswith("http"):
            norm = "https://" + norm
        async with aiohttp.ClientSession() as s:
            async with s.head(
                norm,
                timeout=aiohttp.ClientTimeout(total=5),
                headers={"User-Agent": "StargazerBot/1.0"},
                allow_redirects=True,
            ) as head_r:
                ct = head_r.headers.get("Content-Type", "")
                if not ct.startswith("image/"):
                    return None
                cl = head_r.headers.get("Content-Length")
                if cl and int(cl) > _MAX_IMAGE_DOWNLOAD:
                    logger.info(
                        "Image too large (%s bytes): %s", cl, norm,
                    )
                    return None

            async with s.get(
                norm,
                timeout=aiohttp.ClientTimeout(total=30),
                headers={"User-Agent": "StargazerBot/1.0"},
                allow_redirects=True,
            ) as r:
                if r.status != 200:
                    return None
                data = await r.read()
                if len(data) > _MAX_IMAGE_DOWNLOAD:
                    return None
                mimetype = r.content_type or ct or "image/png"
                return {
                    "data": data,
                    "mimetype": mimetype,
                    "url": norm,
                }
    except asyncio.TimeoutError:
        logger.error("Timeout downloading image from %s", url)
    except Exception:
        logger.exception("Error downloading image from %s", url)
    return None