url_utils

URL detection, content fetching, and metadata caching utilities.

class url_utils.URLMetadataCache(ttl=None)[source]

Bases: object

In-memory TTL cache for URL metadata (default 24 h).

Parameters:

ttl (int | None)

DEFAULT_TTL = 86400
get(url)[source]
Return type:

Any | None

Parameters:

url (str)

set(url, data)[source]
Return type:

None

Parameters:
clear_expired()[source]
Return type:

int

url_utils.detect_crypto_mentions(text)[source]
Return type:

List[tuple]

Parameters:

text (str)

async url_utils.download_image_url(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_arxiv_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_bluesky_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_crypto_prices(pairs)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

pairs (List[tuple])

async url_utils.get_github_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_giphy_media_url(giphy_url)[source]
Return type:

Optional[str]

Parameters:

giphy_url (str)

async url_utils.get_gist_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_nvd_cve_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_paste_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_reddit_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_soundcloud_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_spotify_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_stackoverflow_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_tenor_media_url(tenor_url)[source]
Return type:

Optional[str]

Parameters:

tenor_url (str)

async url_utils.get_tiktok_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_tweet_content(tweet_url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

tweet_url (str)

url_utils.get_url_cache()[source]
Return type:

URLMetadataCache

async url_utils.get_vimeo_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_wikipedia_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

async url_utils.get_youtube_content(url)[source]
Return type:

Optional[Dict[str, Any]]

Parameters:

url (str)

url_utils.is_arxiv_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_bluesky_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_gist_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_github_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_giphy_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_image_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_nvd_cve_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_paste_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_reddit_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_soundcloud_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_spotify_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_stackoverflow_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_tenor_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_tiktok_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_tweet_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_vimeo_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_wikipedia_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_youtube_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.is_ytdlp_supported_url(url)[source]
Return type:

bool

Parameters:

url (str)

url_utils.iter_ytdlp_extractor_test_urls()[source]
Return type:

list[tuple[str, str]]

url_utils.parse_ytdlp_dump_json_stdout(stdout)[source]
Return type:

dict | None

Parameters:

stdout (bytes | str)

url_utils.raise_process_file_limit(*, soft=262144)[source]
Return type:

None

Parameters:

soft (int)