Source code for tools.pollinations_tts

"""Text-to-speech via Pollinations.ai API, uploaded to the current channel."""

from __future__ import annotations

import asyncio
import hashlib
import logging
import os
import tempfile
from typing import TYPE_CHECKING
from urllib.parse import quote

if TYPE_CHECKING:
    from tool_context import ToolContext

logger = logging.getLogger(__name__)

POLLINATIONS_AUDIO_BASE = "https://gen.pollinations.ai/audio"

_FORMAT_TO_MIME = {
    "mp3": "audio/mpeg",
    "wav": "audio/wav",
    "opus": "audio/opus",
    "aac": "audio/aac",
    "flac": "audio/flac",
    "pcm": "audio/basic",
}

TOOL_NAME = "pollinations_tts"
TOOL_DESCRIPTION = (
    "Convert text to speech using the Pollinations.ai audio API "
    "(GET https://gen.pollinations.ai/audio/{text} with voice and format "
    "query parameters), save to a temporary audio file, and upload the "
    "result to the current channel."
)
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "text": {
            "type": "string",
            "description": "The text to synthesize as speech (max ~4096 chars).",
        },
        "voice": {
            "type": "string",
            "description": (
                "Voice preset (e.g. alloy, echo, fable, onyx, nova, shimmer, "
                "rachel, adam). Default: nova."
            ),
        },
        "output_format": {
            "type": "string",
            "description": (
                "Audio container: mp3 (default), wav, opus, aac, flac, or pcm."
            ),
        },
        "speed": {
            "type": "number",
            "description": (
                "Optional speech speed (0.25–4.0) when supported by the API."
            ),
        },
    },
    "required": ["text"],
}


def _normalize_voice(voice: str) -> str:
    """Coerce a caller-supplied voice name into a safe lowercase preset.

    Trims surrounding whitespace and lowercases the value, falling back to
    ``"nova"`` whenever the input is empty, ``None``, or whitespace-only. The
    result is forwarded verbatim as the ``voice`` query parameter on the
    Pollinations audio request, so this normalization is purely cosmetic --
    no allow-list is enforced and unknown voices are passed straight through
    to the upstream API. This is a pure function with no side effects.

    This is called by :func:`run` in this module to derive ``voice_n`` before
    building the request parameters; no other internal callers were found.

    Args:
        voice: The raw voice preset requested by the model (e.g. ``"alloy"``,
            ``"nova"``), possibly empty or ``None``.

    Returns:
        str: A trimmed, lowercased voice name, or ``"nova"`` when the input
        resolves to nothing usable.
    """
    v = (voice or "nova").strip().lower()
    return v if v else "nova"


def _normalize_format(fmt: str) -> str:
    """Validate and canonicalize the requested audio container format.

    Trims whitespace, lowercases, and strips a leading dot from the input,
    then checks the result against the supported-format table
    :data:`_FORMAT_TO_MIME`. Anything not present in that table (including
    empty or ``None`` input) falls back to ``"mp3"``, guaranteeing the caller
    always receives a key that can be looked up in :data:`_FORMAT_TO_MIME`
    for the MIME type and used as the temp-file suffix. This is a pure
    function with no side effects.

    This is called by :func:`run` in this module to derive ``fmt`` before
    selecting the MIME type and file suffix; no other internal callers were
    found.

    Args:
        fmt: The raw output format requested by the model (e.g. ``"mp3"``,
            ``".wav"``, ``"OPUS"``), possibly empty or ``None``.

    Returns:
        str: One of the supported format keys (``"mp3"``, ``"wav"``,
        ``"opus"``, ``"aac"``, ``"flac"``, ``"pcm"``), defaulting to
        ``"mp3"`` for unrecognized input.
    """
    f = (fmt or "mp3").strip().lower().lstrip(".")
    return f if f in _FORMAT_TO_MIME else "mp3"


def _write_temp_read_unlink(data: bytes, suffix: str) -> bytes:
    """Round-trip bytes through a temporary file and return them, cleaning up.

    Writes the audio payload to a freshly created temp file, reads it straight
    back, and unlinks the file in a ``finally`` so nothing is left on disk. The
    round-trip exists so the returned bytes have actually been materialized on
    the filesystem (matching how the platform adapter expects staged file
    payloads) while keeping the synchronous, blocking file I/O off the event
    loop -- :func:`run` invokes it through ``asyncio.to_thread``.

    This touches only the local filesystem via ``tempfile.mkstemp`` and
    ``os`` file operations; it performs no network or Redis I/O. Within this
    module it is called once by :func:`run`; no other in-repo callers were
    found.

    Args:
        data: The audio bytes to stage.
        suffix: File suffix (e.g. ``".mp3"``) for the temp file.

    Returns:
        bytes: The same payload, read back from the temp file before deletion.
    """
    fd, path = tempfile.mkstemp(suffix=suffix)
    try:
        with os.fdopen(fd, "wb") as f:
            f.write(data)
        with open(path, "rb") as f:
            return f.read()
    finally:
        try:
            os.unlink(path)
        except OSError:
            pass



[docs]
async def run(
    text: str,
    voice: str = "nova",
    output_format: str = "mp3",
    speed: float | None = None,
    ctx: ToolContext | None = None,
) -> str:
    """Synthesize speech via the Pollinations audio API and upload it.

    Implements the ``pollinations_tts`` tool. It turns text into an audio file
    by calling the Pollinations.ai audio endpoint, stages the bytes locally, and
    uploads the result to the channel the tool was invoked in, returning a
    confirmation (with the file URL when available) for the model to reference.

    The flow validates the context and text length, resolves the caller's
    Pollinations API key through ``tools.pollinate._resolve_api_key`` (returning
    ``tools.manage_api_keys.missing_api_key_error`` when absent), then normalizes
    the voice and format via :func:`_normalize_voice` and
    :func:`_normalize_format`. It issues a GET to ``POLLINATIONS_AUDIO_BASE``
    with the URL-encoded text and ``voice``/``format``/optional ``speed`` query
    params using the SSRF-guarded ``tools._safe_http`` client, stages the
    returned bytes off-loop through :func:`_write_temp_read_unlink`, and uploads
    them via ``ctx.adapter.send_file`` to ``ctx.channel_id``, also appending a
    record to ``ctx.sent_files`` so downstream code can reference the upload.
    It is registered and dispatched dynamically by ``tool_loader.py`` as the
    module-level ``run`` handler; no in-repo callers invoke it directly outside
    of tests.

    Args:
        text: The text to synthesize; required, trimmed, and capped at 4096
            characters.
        voice: Voice preset name; normalized and defaulted to ``"nova"``.
        output_format: Audio container; normalized and defaulted to ``"mp3"``.
        speed: Optional speech speed, applied only when within ``0.25``-``4.0``.
        ctx: The tool execution context. Must expose a non-``None`` ``adapter``;
            ``ctx.channel_id`` and ``ctx.sent_files`` are used for the upload.

    Returns:
        str: A success message (optionally including the uploaded file URL), or
        an ``Error:`` prefixed string when the adapter or key is missing, the
        text is empty or too long, the API returns a non-200 or empty response,
        or any exception occurs.
    """
    from tools._safe_http import safe_http_request, safe_httpx_client
    from tools.pollinate import _resolve_api_key

    if ctx is None or ctx.adapter is None:
        return "Error: No platform adapter available."

    text = (text or "").strip()
    if not text:
        return "Error: text is empty."

    if len(text) > 4096:
        return "Error: text exceeds 4096 characters."

    api_key, _own = await _resolve_api_key(ctx)
    if not api_key:
        from tools.manage_api_keys import missing_api_key_error

        return missing_api_key_error("pollinations")

    fmt = _normalize_format(output_format)
    voice_n = _normalize_voice(voice)
    mime = _FORMAT_TO_MIME[fmt]
    suffix = f".{fmt}"

    encoded = quote(text, safe="")
    url = f"{POLLINATIONS_AUDIO_BASE}/{encoded}"
    params: dict[str, str | float] = {"voice": voice_n, "format": fmt}
    if speed is not None and 0.25 <= float(speed) <= 4.0:
        params["speed"] = float(speed)

    headers = {"Authorization": f"Bearer {api_key}"}

    try:
        async with safe_httpx_client(timeout=120.0) as http:
            resp = await safe_http_request(
                http,
                "GET",
                url,
                params=params,
                headers=headers,
                max_redirects=5,
            )
        if resp.status_code != 200:
            logger.error(
                "Pollinations TTS HTTP %s: %s",
                resp.status_code,
                resp.text[:500],
            )
            return (
                f"Error: Pollinations API returned {resp.status_code}: "
                f"{resp.text[:300]}"
            )

        ct = (resp.headers.get("content-type") or "").split(";")[0].strip().lower()
        if ct and not ct.startswith("audio/") and "octet-stream" not in ct:
            logger.warning("Pollinations TTS unexpected content-type: %s", ct)

        audio_bytes = resp.content
        if not audio_bytes:
            return "Error: empty audio response."

        staged = await asyncio.to_thread(
            _write_temp_read_unlink,
            audio_bytes,
            suffix,
        )

        short = hashlib.sha256(staged).hexdigest()[:12]
        fname = f"pollinations_tts_{short}{suffix}"

        file_url = await ctx.adapter.send_file(
            ctx.channel_id,
            staged,
            fname,
            mime,
        )
        ctx.sent_files.append(
            {
                "data": staged,
                "filename": fname,
                "mimetype": mime,
                "file_url": file_url or "",
            }
        )
        msg = "Successfully uploaded Pollinations TTS audio to the channel."
        if file_url:
            msg += f" File URL: {file_url}"
        return msg
    except Exception as exc:
        logger.error("Pollinations TTS error: %s", exc, exc_info=True)
        return f"Error generating speech: {exc}"