"""Direct image URL detection and download."""
from __future__ import annotations
import asyncio
import logging
import re
from typing import Any, Dict, Optional
import aiohttp
logger = logging.getLogger(__name__)
_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
_MAX_IMAGE_DOWNLOAD = 20 * 1024 * 1024 # 20 MB
_IMAGE_URL_PATTERN = re.compile(
r"(https?://)"
r"("
r"cdn\.discordapp\.com/attachments/[^\s]+"
r"|media\.discordapp\.net/attachments/[^\s]+"
r"|i\.imgur\.com/[a-zA-Z0-9]+\.[a-zA-Z]+"
r"|[^\s]+\.(?:png|jpe?g|gif|webp|bmp)(?:\?[^\s]*)?"
r")",
re.IGNORECASE,
)
[docs]
def is_image_url(url: str) -> bool:
return _IMAGE_URL_PATTERN.search(url) is not None
[docs]
async def download_image_url(
url: str,
) -> Optional[Dict[str, Any]]:
try:
norm = url.strip()
if not norm.startswith("http"):
norm = "https://" + norm
async with aiohttp.ClientSession() as s:
async with s.head(
norm,
timeout=aiohttp.ClientTimeout(total=5),
headers={"User-Agent": "StargazerBot/1.0"},
allow_redirects=True,
) as head_r:
ct = head_r.headers.get("Content-Type", "")
if not ct.startswith("image/"):
return None
cl = head_r.headers.get("Content-Length")
if cl and int(cl) > _MAX_IMAGE_DOWNLOAD:
logger.info(
"Image too large (%s bytes): %s", cl, norm,
)
return None
async with s.get(
norm,
timeout=aiohttp.ClientTimeout(total=30),
headers={"User-Agent": "StargazerBot/1.0"},
allow_redirects=True,
) as r:
if r.status != 200:
return None
data = await r.read()
if len(data) > _MAX_IMAGE_DOWNLOAD:
return None
mimetype = r.content_type or ct or "image/png"
return {
"data": data,
"mimetype": mimetype,
"url": norm,
}
except asyncio.TimeoutError:
logger.error("Timeout downloading image from %s", url)
except Exception:
logger.exception("Error downloading image from %s", url)
return None