"""Text-to-speech via ElevenLabs API, uploaded to the current channel."""
from __future__ import annotations
import logging
import os
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from tool_context import ToolContext
logger = logging.getLogger(__name__)
# Default voice map (no Redis dependency)
_DEFAULT_VOICE_MAP: dict[str, str] = {
"Rachel": "21m00Tcm4TlvDq8ikWAM",
"Domi": "AZnzlk1XvdvUeBnXmlld",
"Bella": "EXAVITQu4vr4xnSDxMaL",
"Antoni": "ErXwobaYiN019P7ENGINE",
"Elli": "MF3mGyEYCl7XYWbV9V6O",
"Freyja": "21m00Tcm4TlvDq8ikWAM",
"Josh": "TxGEqnHWrfWFTfGW9XjX",
"Arnold": "VR6AewLTigWG4xSOh_pT",
"Adam": "pNInz6obpgDQGcFmaJgB",
"Sam": "yoZ06aMkjVJeG3kfOaBe",
"Stargazer": "vziGpJpdnYN1FuXssn3c",
}
# Module-level mutable copy so voice_design can add to it
_voice_map: dict[str, str] = dict(_DEFAULT_VOICE_MAP)
TOOL_NAME = "elevenlabs_tts"
TOOL_DESCRIPTION = (
"Convert text to speech using ElevenLabs and upload the audio "
"to the current channel. Supports audio tags like "
"[happy], [whisper], [laughing], [sighs] for emotional delivery."
)
TOOL_PARAMETERS = {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": (
"The text to convert to speech. Supports v3 audio "
"tags in brackets like [happy], [whisper], "
"[laughing], [sighs], [short pause], etc."
),
},
"voice_name": {
"type": "string",
"description": (
"Voice to use. Available: Rachel, Domi, Bella, "
"Antoni, Elli, Freyja, Josh, Arnold, Adam, Sam, "
"Stargazer. Default: Stargazer."
),
},
"stability": {
"type": "number",
"description": (
"Voice stability 0.0-1.0. Default 0.75."
),
},
"similarity_boost": {
"type": "number",
"description": (
"Similarity boost 0.0-1.0. Default 0.75."
),
},
"model_id": {
"type": "string",
"description": (
"ElevenLabs model. Default 'eleven_v3'."
),
},
},
"required": ["text"],
}
[docs]
async def run(
text: str,
voice_name: str = "Stargazer",
stability: float = 0.75,
similarity_boost: float = 0.75,
model_id: str = "eleven_v3",
ctx: ToolContext | None = None,
) -> str:
"""Execute this tool and return the result.
Args:
text (str): Text content.
voice_name (str): The voice name value.
stability (float): The stability value.
similarity_boost (float): The similarity boost value.
model_id (str): The model id value.
ctx (ToolContext | None): Tool execution context providing access to bot internals.
Returns:
str: Result string.
"""
from elevenlabs import VoiceSettings
from elevenlabs.client import AsyncElevenLabs
api_key = None
if ctx and ctx.redis and ctx.user_id:
from tools.manage_api_keys import get_user_api_key
api_key = await get_user_api_key(
ctx.user_id, "elevenlabs",
redis_client=ctx.redis, channel_id=ctx.channel_id,
config=getattr(ctx, "config", None),
)
api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
if not api_key:
from tools.manage_api_keys import missing_api_key_error
return missing_api_key_error("elevenlabs")
if ctx is None or ctx.adapter is None:
return "Error: No platform adapter available."
voice_id = _voice_map.get(
voice_name, _voice_map.get("Stargazer"),
)
try:
el_client = AsyncElevenLabs(api_key=api_key)
audio_gen = el_client.text_to_speech.convert(
voice_id=voice_id,
text=text,
model_id=model_id,
voice_settings=VoiceSettings(
stability=stability,
similarity_boost=similarity_boost,
),
)
audio_bytes = b""
async for chunk in audio_gen:
audio_bytes += chunk
file_url = await ctx.adapter.send_file(
ctx.channel_id, audio_bytes, "voice.mp3", "audio/mpeg",
)
ctx.sent_files.append({
"data": audio_bytes,
"filename": "voice.mp3",
"mimetype": "audio/mpeg",
"file_url": file_url or "",
})
msg = "Successfully uploaded TTS audio to the channel."
if file_url:
msg += f" File URL: {file_url}"
return msg
except Exception as exc:
logger.error("ElevenLabs TTS error: %s", exc, exc_info=True)
return f"Error generating speech: {exc}"