"""URL pattern detection helpers."""
from __future__ import annotations
import re
[docs]
def is_youtube_url(url: str) -> bool:
if "youtu.be/" in url:
return re.match(
r"(https?://)?(youtu\.be)/([a-zA-Z0-9_-]{11})(\?.*)?$", url,
) is not None
return re.match(
r"(https?://)?(www\.|m\.)?(youtube|youtube-nocookie)\.(com)/"
r"(watch\?v=|embed/|v/|shorts/)([a-zA-Z0-9_-]{11})(\?.*)?$",
url,
) is not None
[docs]
def is_tenor_url(url: str) -> bool:
return (
re.search(r"(https?://)?(www\.|media\.)?tenor\.com/", url) is not None
)
[docs]
def is_giphy_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.)?(giphy\.com/|gph\.is/|media\d*\.giphy\.com/)",
url,
) is not None
[docs]
def is_spotify_url(url: str) -> bool:
return re.search(
r"(https?://)?(open\.)?spotify\.com/"
r"(track|album|playlist|episode|artist|show)/[a-zA-Z0-9]+",
url,
) is not None
[docs]
def is_soundcloud_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.)?soundcloud\.com/[^/]+/[^/]+", url,
) is not None
[docs]
def is_tiktok_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.|vm\.)?tiktok\.com/"
r"(@[^/]+/video/\d+|[a-zA-Z0-9]+/?)",
url,
) is not None
[docs]
def is_vimeo_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.|player\.)?vimeo\.com/(video/)?\d+", url,
) is not None
[docs]
def is_github_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.)?github\.com/[^/]+/[^/]+"
r"(/issues/\d+|/pull/\d+)?",
url,
) is not None
[docs]
def is_arxiv_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.)?arxiv\.org/(abs|pdf)/[\d\.]+v?\d*", url,
) is not None
[docs]
def is_reddit_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.|old\.|new\.)?reddit\.com/r/[^/]+/"
r"comments/[a-z0-9]+",
url,
) is not None
[docs]
def is_wikipedia_url(url: str) -> bool:
return re.search(
r"(https?://)?([a-z]{2,3}\.)?wikipedia\.org/wiki/[^#\s]+", url,
) is not None
[docs]
def is_gist_url(url: str) -> bool:
return re.search(
r"(https?://)?gist\.github\.com/([^/]+/)?[a-f0-9]+", url,
) is not None
[docs]
def is_bluesky_url(url: str) -> bool:
return re.search(
r"(https?://)?bsky\.app/profile/[^/]+/post/[a-z0-9]+", url,
) is not None
[docs]
def is_stackoverflow_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.)?"
r"(stackoverflow\.com|superuser\.com|serverfault\.com|"
r"askubuntu\.com|[a-z]+\.stackexchange\.com)/questions/\d+",
url,
) is not None
[docs]
def is_nvd_cve_url(url: str) -> bool:
return re.search(
r"(https?://)?(www\.)?nvd\.nist\.gov/vuln/detail/CVE-\d{4}-\d{4,}",
url, re.IGNORECASE,
) is not None