Source code for tools.elevenlabs_tts

"""Text-to-speech via ElevenLabs API, uploaded to the current channel."""

from __future__ import annotations

import logging
import os
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from tool_context import ToolContext

logger = logging.getLogger(__name__)

# Default voice map (no Redis dependency)
_DEFAULT_VOICE_MAP: dict[str, str] = {
    "Rachel": "21m00Tcm4TlvDq8ikWAM",
    "Domi": "AZnzlk1XvdvUeBnXmlld",
    "Bella": "EXAVITQu4vr4xnSDxMaL",
    "Antoni": "ErXwobaYiN019P7ENGINE",
    "Elli": "MF3mGyEYCl7XYWbV9V6O",
    "Freyja": "21m00Tcm4TlvDq8ikWAM",
    "Josh": "TxGEqnHWrfWFTfGW9XjX",
    "Arnold": "VR6AewLTigWG4xSOh_pT",
    "Adam": "pNInz6obpgDQGcFmaJgB",
    "Sam": "yoZ06aMkjVJeG3kfOaBe",
    "Stargazer": "vziGpJpdnYN1FuXssn3c",
}

# Module-level mutable copy so voice_design can add to it
_voice_map: dict[str, str] = dict(_DEFAULT_VOICE_MAP)

TOOL_NAME = "elevenlabs_tts"
TOOL_DESCRIPTION = (
    "Convert text to speech using ElevenLabs and upload the audio "
    "to the current channel. Supports audio tags like "
    "[happy], [whisper], [laughing], [sighs] for emotional delivery."
)
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "text": {
            "type": "string",
            "description": (
                "The text to convert to speech. Supports v3 audio "
                "tags in brackets like [happy], [whisper], "
                "[laughing], [sighs], [short pause], etc."
            ),
        },
        "voice_name": {
            "type": "string",
            "description": (
                "Voice to use. Available: Rachel, Domi, Bella, "
                "Antoni, Elli, Freyja, Josh, Arnold, Adam, Sam, "
                "Stargazer. Default: Stargazer."
            ),
        },
        "stability": {
            "type": "number",
            "description": (
                "Voice stability 0.0-1.0. Default 0.75."
            ),
        },
        "similarity_boost": {
            "type": "number",
            "description": (
                "Similarity boost 0.0-1.0. Default 0.75."
            ),
        },
        "model_id": {
            "type": "string",
            "description": (
                "ElevenLabs model. Default 'eleven_v3'."
            ),
        },
    },
    "required": ["text"],
}



[docs]
async def run(
    text: str,
    voice_name: str = "Stargazer",
    stability: float = 0.75,
    similarity_boost: float = 0.75,
    model_id: str = "eleven_v3",
    ctx: ToolContext | None = None,
) -> str:
    """Execute this tool and return the result.

        Args:
            text (str): Text content.
            voice_name (str): The voice name value.
            stability (float): The stability value.
            similarity_boost (float): The similarity boost value.
            model_id (str): The model id value.
            ctx (ToolContext | None): Tool execution context providing access to bot internals.

        Returns:
            str: Result string.
        """
    from elevenlabs import VoiceSettings
    from elevenlabs.client import AsyncElevenLabs

    api_key = None
    if ctx and ctx.redis and ctx.user_id:
        from tools.manage_api_keys import get_user_api_key
        api_key = await get_user_api_key(
            ctx.user_id, "elevenlabs",
            redis_client=ctx.redis, channel_id=ctx.channel_id,
            config=getattr(ctx, "config", None),
        )
    api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
    if not api_key:
        from tools.manage_api_keys import missing_api_key_error
        return missing_api_key_error("elevenlabs")

    if ctx is None or ctx.adapter is None:
        return "Error: No platform adapter available."

    voice_id = _voice_map.get(
        voice_name, _voice_map.get("Stargazer"),
    )

    try:
        el_client = AsyncElevenLabs(api_key=api_key)

        audio_gen = el_client.text_to_speech.convert(
            voice_id=voice_id,
            text=text,
            model_id=model_id,
            voice_settings=VoiceSettings(
                stability=stability,
                similarity_boost=similarity_boost,
            ),
        )

        audio_bytes = b""
        async for chunk in audio_gen:
            audio_bytes += chunk

        file_url = await ctx.adapter.send_file(
            ctx.channel_id, audio_bytes, "voice.mp3", "audio/mpeg",
        )
        ctx.sent_files.append({
            "data": audio_bytes,
            "filename": "voice.mp3",
            "mimetype": "audio/mpeg",
            "file_url": file_url or "",
        })
        msg = "Successfully uploaded TTS audio to the channel."
        if file_url:
            msg += f" File URL: {file_url}"
        return msg

    except Exception as exc:
        logger.error("ElevenLabs TTS error: %s", exc, exc_info=True)
        return f"Error generating speech: {exc}"