Source code for url_utils.ytdlp_json

"""yt-dlp extractor detection and JSON parsing helpers."""

from __future__ import annotations

import json
import logging

logger = logging.getLogger(__name__)

_ytdlp_extractors: list | None = None


def _get_ytdlp_extractors() -> list:
    global _ytdlp_extractors
    if _ytdlp_extractors is not None:
        return _ytdlp_extractors
    try:
        import yt_dlp
        _ytdlp_extractors = [
            e for e in yt_dlp.extractor.gen_extractors()
            if e.IE_NAME != "generic"
        ]
    except ImportError:
        logger.warning(
            "yt-dlp not installed — is_ytdlp_supported_url will always return False",
        )
        _ytdlp_extractors = []
    return _ytdlp_extractors


[docs] def is_ytdlp_supported_url(url: str) -> bool: if not url or not url.startswith("http"): return False for extractor in _get_ytdlp_extractors(): if extractor.suitable(url): return True return False
YTDLP_METADATA_NETWORK_ARGS: tuple[str, ...] = ( "--force-ipv4", "--socket-timeout", "12", "--retries", "1", "--extractor-retries", "1", )
[docs] def raise_process_file_limit(*, soft: int = 262_144) -> None: try: import resource cur_soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) target = min(max(soft, cur_soft), hard) if target > cur_soft: resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard)) except (ValueError, OSError): pass
def _first_json_dict_in_string(s: str) -> dict | None: decoder = json.JSONDecoder() s = s.strip() if not s: return None for i, c in enumerate(s): if c != "{": continue try: obj, _end = decoder.raw_decode(s, i) if isinstance(obj, dict): return obj except json.JSONDecodeError: continue return None
[docs] def parse_ytdlp_dump_json_stdout(stdout: bytes | str) -> dict | None: if isinstance(stdout, bytes): text = stdout.decode("utf-8", errors="replace") else: text = stdout text = text.strip() if text.startswith("\ufeff"): text = text.lstrip("\ufeff").strip() if not text: return None got = _first_json_dict_in_string(text) if got is not None: return got for line in text.splitlines(): got = _first_json_dict_in_string(line) if got is not None: return got return None
[docs] def iter_ytdlp_extractor_test_urls() -> list[tuple[str, str]]: try: from yt_dlp.extractor import gen_extractors except ImportError: return [] seen: set[str] = set() out: list[tuple[str, str]] = [] for ie in gen_extractors(): name = ie.IE_NAME for t in getattr(ie, "_TESTS", None) or []: url: str | None = None if isinstance(t, dict): u = t.get("url") if isinstance(u, str) and u.startswith("http"): url = u elif isinstance(t, str) and t.startswith("http"): url = t if url and url not in seen: seen.add(url) out.append((name, url)) return out