Source code for url_utils.image

"""Direct image URL detection and download."""

from __future__ import annotations

import asyncio
import logging
import re
from typing import Any, Dict, Optional

import aiohttp

logger = logging.getLogger(__name__)

_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
_MAX_IMAGE_DOWNLOAD = 20 * 1024 * 1024  # 20 MB

_IMAGE_URL_PATTERN = re.compile(
    r"(https?://)"
    r"("
    r"cdn\.discordapp\.com/attachments/[^\s]+"
    r"|media\.discordapp\.net/attachments/[^\s]+"
    r"|i\.imgur\.com/[a-zA-Z0-9]+\.[a-zA-Z]+"
    r"|[^\s]+\.(?:png|jpe?g|gif|webp|bmp)(?:\?[^\s]*)?"
    r")",
    re.IGNORECASE,
)


[docs] def is_image_url(url: str) -> bool: return _IMAGE_URL_PATTERN.search(url) is not None
[docs] async def download_image_url( url: str, ) -> Optional[Dict[str, Any]]: try: norm = url.strip() if not norm.startswith("http"): norm = "https://" + norm async with aiohttp.ClientSession() as s: async with s.head( norm, timeout=aiohttp.ClientTimeout(total=5), headers={"User-Agent": "StargazerBot/1.0"}, allow_redirects=True, ) as head_r: ct = head_r.headers.get("Content-Type", "") if not ct.startswith("image/"): return None cl = head_r.headers.get("Content-Length") if cl and int(cl) > _MAX_IMAGE_DOWNLOAD: logger.info( "Image too large (%s bytes): %s", cl, norm, ) return None async with s.get( norm, timeout=aiohttp.ClientTimeout(total=30), headers={"User-Agent": "StargazerBot/1.0"}, allow_redirects=True, ) as r: if r.status != 200: return None data = await r.read() if len(data) > _MAX_IMAGE_DOWNLOAD: return None mimetype = r.content_type or ct or "image/png" return { "data": data, "mimetype": mimetype, "url": norm, } except asyncio.TimeoutError: logger.error("Timeout downloading image from %s", url) except Exception: logger.exception("Error downloading image from %s", url) return None