"""oEmbed and HTML-scraping fetchers for video/audio/GIF platforms."""
from __future__ import annotations
import asyncio
import logging
import re
from typing import Any, Dict, Optional
import aiohttp
from .cache import get_url_cache
from .fetch_common import _TIMEOUT
logger = logging.getLogger(__name__)
[docs]
async def get_youtube_content(url: str) -> Optional[Dict[str, Any]]:
cache = get_url_cache()
key = f"youtube:{url}"
cached = cache.get(key)
if cached is not None:
return cached
try:
from urllib.parse import quote
norm = url.strip()
if not norm.startswith("http"):
norm = "https://" + norm
is_shorts = "/shorts/" in norm.lower()
oembed = (
f"https://www.youtube.com/oembed?url={quote(norm, safe='')}"
"&format=json"
)
async with aiohttp.ClientSession() as s:
async with s.get(oembed, timeout=_TIMEOUT) as r:
if r.status != 200:
return None
d = await r.json()
result = {
"title": d.get("title", "Unknown Title"),
"channel_name": d.get("author_name", "Unknown Channel"),
"channel_url": d.get("author_url", ""),
"is_shorts": is_shorts,
"thumbnail_url": d.get("thumbnail_url", ""),
}
cache.set(key, result)
return result
except asyncio.TimeoutError:
logger.error("Timeout fetching YouTube metadata from %s", url)
except Exception:
logger.exception("Error fetching YouTube metadata from %s", url)
return None
[docs]
async def get_spotify_content(url: str) -> Optional[Dict[str, Any]]:
cache = get_url_cache()
key = f"spotify:{url}"
cached = cache.get(key)
if cached is not None:
return cached
try:
from urllib.parse import quote
norm = url.strip()
if not norm.startswith("http"):
norm = "https://" + norm
ctype = "track"
for t in ["track", "album", "playlist", "episode", "artist", "show"]:
if f"/{t}/" in norm:
ctype = t
break
oembed = f"https://open.spotify.com/oembed?url={quote(norm, safe='')}"
async with aiohttp.ClientSession() as s:
async with s.get(oembed, timeout=_TIMEOUT) as r:
if r.status != 200:
return None
d = await r.json()
result = {
"title": d.get("title", "Unknown"),
"type": ctype,
"thumbnail_url": d.get("thumbnail_url", ""),
}
cache.set(key, result)
return result
except asyncio.TimeoutError:
logger.error("Timeout fetching Spotify metadata from %s", url)
except Exception:
logger.exception("Error fetching Spotify metadata from %s", url)
return None
[docs]
async def get_soundcloud_content(url: str) -> Optional[Dict[str, Any]]:
cache = get_url_cache()
key = f"soundcloud:{url}"
cached = cache.get(key)
if cached is not None:
return cached
try:
from urllib.parse import quote
norm = url.strip()
if not norm.startswith("http"):
norm = "https://" + norm
oembed = (
f"https://soundcloud.com/oembed?url={quote(norm, safe='')}"
"&format=json"
)
async with aiohttp.ClientSession() as s:
async with s.get(oembed, timeout=_TIMEOUT) as r:
if r.status != 200:
return None
d = await r.json()
result = {
"title": d.get("title", "Unknown"),
"author_name": d.get("author_name", "Unknown Artist"),
"description": d.get("description", ""),
"thumbnail_url": d.get("thumbnail_url", ""),
}
cache.set(key, result)
return result
except asyncio.TimeoutError:
logger.error("Timeout fetching SoundCloud metadata from %s", url)
except Exception:
logger.exception("Error fetching SoundCloud metadata from %s", url)
return None
[docs]
async def get_tiktok_content(url: str) -> Optional[Dict[str, Any]]:
cache = get_url_cache()
key = f"tiktok:{url}"
cached = cache.get(key)
if cached is not None:
return cached
try:
from urllib.parse import quote
norm = url.strip()
if not norm.startswith("http"):
norm = "https://" + norm
oembed = f"https://www.tiktok.com/oembed?url={quote(norm, safe='')}"
async with aiohttp.ClientSession() as s:
async with s.get(oembed, timeout=_TIMEOUT) as r:
if r.status != 200:
return None
d = await r.json()
result = {
"title": d.get("title", "TikTok Video"),
"author_name": d.get("author_name", "Unknown"),
"author_url": d.get("author_url", ""),
"thumbnail_url": d.get("thumbnail_url", ""),
}
cache.set(key, result)
return result
except asyncio.TimeoutError:
logger.error("Timeout fetching TikTok metadata from %s", url)
except Exception:
logger.exception("Error fetching TikTok metadata from %s", url)
return None
[docs]
async def get_vimeo_content(url: str) -> Optional[Dict[str, Any]]:
cache = get_url_cache()
key = f"vimeo:{url}"
cached = cache.get(key)
if cached is not None:
return cached
try:
from urllib.parse import quote
norm = url.strip()
if not norm.startswith("http"):
norm = "https://" + norm
oembed = (
f"https://vimeo.com/api/oembed.json?url={quote(norm, safe='')}"
)
async with aiohttp.ClientSession() as s:
async with s.get(oembed, timeout=_TIMEOUT) as r:
if r.status != 200:
return None
d = await r.json()
result = {
"title": d.get("title", "Unknown"),
"author_name": d.get("author_name", "Unknown"),
"description": d.get("description", ""),
"duration": d.get("duration", 0),
"thumbnail_url": d.get("thumbnail_url", ""),
}
cache.set(key, result)
return result
except asyncio.TimeoutError:
logger.error("Timeout fetching Vimeo metadata from %s", url)
except Exception:
logger.exception("Error fetching Vimeo metadata from %s", url)
return None