"""yt-dlp extractor detection and JSON parsing helpers."""
from __future__ import annotations
import json
import logging
logger = logging.getLogger(__name__)
_ytdlp_extractors: list | None = None
def _get_ytdlp_extractors() -> list:
global _ytdlp_extractors
if _ytdlp_extractors is not None:
return _ytdlp_extractors
try:
import yt_dlp
_ytdlp_extractors = [
e for e in yt_dlp.extractor.gen_extractors()
if e.IE_NAME != "generic"
]
except ImportError:
logger.warning(
"yt-dlp not installed — is_ytdlp_supported_url will always return False",
)
_ytdlp_extractors = []
return _ytdlp_extractors
[docs]
def is_ytdlp_supported_url(url: str) -> bool:
if not url or not url.startswith("http"):
return False
for extractor in _get_ytdlp_extractors():
if extractor.suitable(url):
return True
return False
YTDLP_METADATA_NETWORK_ARGS: tuple[str, ...] = (
"--force-ipv4",
"--socket-timeout",
"12",
"--retries",
"1",
"--extractor-retries",
"1",
)
[docs]
def raise_process_file_limit(*, soft: int = 262_144) -> None:
try:
import resource
cur_soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
target = min(max(soft, cur_soft), hard)
if target > cur_soft:
resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard))
except (ValueError, OSError):
pass
def _first_json_dict_in_string(s: str) -> dict | None:
decoder = json.JSONDecoder()
s = s.strip()
if not s:
return None
for i, c in enumerate(s):
if c != "{":
continue
try:
obj, _end = decoder.raw_decode(s, i)
if isinstance(obj, dict):
return obj
except json.JSONDecodeError:
continue
return None
[docs]
def parse_ytdlp_dump_json_stdout(stdout: bytes | str) -> dict | None:
if isinstance(stdout, bytes):
text = stdout.decode("utf-8", errors="replace")
else:
text = stdout
text = text.strip()
if text.startswith("\ufeff"):
text = text.lstrip("\ufeff").strip()
if not text:
return None
got = _first_json_dict_in_string(text)
if got is not None:
return got
for line in text.splitlines():
got = _first_json_dict_in_string(line)
if got is not None:
return got
return None