"""Design a new voice with ElevenLabs and manage the voice map."""
from __future__ import annotations
import base64
import logging
import os
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from tool_context import ToolContext
logger = logging.getLogger(__name__)
TOOL_NAME = "elevenlabs_voice_design"
TOOL_DESCRIPTION = (
"Design a new voice with ElevenLabs or manage the voice map. "
"Actions: 'design' (create a new voice from a description), "
"'add' (add an existing voice ID to the map), "
"'list' (show all available voices)."
)
TOOL_PARAMETERS = {
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["design", "add", "list"],
"description": "The voice management action.",
},
"description": {
"type": "string",
"description": (
"Voice description (design only, min 20 chars)."
),
},
"voice_name": {
"type": "string",
"description": (
"Name to save the voice as (design/add)."
),
},
"voice_id": {
"type": "string",
"description": (
"Existing ElevenLabs voice ID (add only)."
),
},
"text": {
"type": "string",
"description": (
"Preview text for design (min 100 chars, "
"or omit for auto-generated)."
),
},
"model_id": {
"type": "string",
"description": (
"ElevenLabs model. Default "
"'eleven_multilingual_ttv_v2'."
),
},
"guidance_scale": {
"type": "number",
"description": (
"How closely AI follows prompt. Lower = more "
"creative. Default 5.0."
),
},
"seed": {
"type": "integer",
"description": "Random seed for reproducibility.",
},
},
"required": ["action"],
}
[docs]
async def run(
action: str,
description: str | None = None,
voice_name: str | None = None,
voice_id: str | None = None,
text: str | None = None,
model_id: str = "eleven_multilingual_ttv_v2",
guidance_scale: float = 5.0,
seed: int | None = None,
ctx: ToolContext | None = None,
) -> str:
# Import the shared voice map from tts module
"""Execute this tool and return the result.
Args:
action (str): The action value.
description (str | None): Human-readable description.
voice_name (str | None): The voice name value.
voice_id (str | None): The voice id value.
text (str | None): Text content.
model_id (str): The model id value.
guidance_scale (float): The guidance scale value.
seed (int | None): The seed value.
ctx (ToolContext | None): Tool execution context providing access to bot internals.
Returns:
str: Result string.
"""
from tools.elevenlabs_tts import _voice_map
# --- LIST ---
if action == "list":
if not _voice_map:
return "No voices in the voice map."
lines = ["**Available voices:**"]
for name, vid in sorted(_voice_map.items()):
lines.append(f" {name}: {vid}")
return "\n".join(lines)
# --- ADD ---
if action == "add":
if not voice_name or not voice_id:
return (
"Error: 'voice_name' and 'voice_id' are "
"required for add."
)
if voice_name in _voice_map:
return (
f"Error: Voice '{voice_name}' already exists. "
f"Choose a different name."
)
_voice_map[voice_name] = voice_id
return (
f"Added voice '{voice_name}' with ID "
f"'{voice_id}' to the voice map."
)
# --- DESIGN ---
if action == "design":
if not voice_name:
return "Error: 'voice_name' is required for design."
if not description or len(description) < 20:
return (
"Error: 'description' is required and must be "
"at least 20 characters."
)
if voice_name in _voice_map:
return (
f"Error: Voice '{voice_name}' already exists."
)
api_key = None
if ctx and ctx.redis and ctx.user_id:
from tools.manage_api_keys import get_user_api_key
api_key = await get_user_api_key(
ctx.user_id, "elevenlabs",
redis_client=ctx.redis, channel_id=ctx.channel_id,
config=getattr(ctx, "config", None),
)
api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
if not api_key:
from tools.manage_api_keys import missing_api_key_error
return missing_api_key_error("elevenlabs")
if ctx is None or ctx.adapter is None:
return "Error: No platform adapter available."
try:
from elevenlabs.client import AsyncElevenLabs
el = AsyncElevenLabs(api_key=api_key)
effective_text = text
auto_gen = not text
if text and len(text) < 100:
effective_text = None
auto_gen = True
kwargs: dict = {
"voice_description": description,
"text": effective_text,
"model_id": model_id,
"guidance_scale": guidance_scale,
"auto_generate_text": auto_gen,
}
if seed is not None:
kwargs["seed"] = seed
response = await el.text_to_voice.design(**kwargs)
if not response.previews:
return "Error: No voice previews generated."
first_id = response.previews[0].generated_voice_id
_voice_map[voice_name] = first_id
preview_ids = []
for preview in response.previews:
audio = base64.b64decode(preview.audio_base_64)
fname = (
f"preview_{preview.generated_voice_id}.mp3"
)
await ctx.adapter.send_file(
ctx.channel_id, audio, fname, "audio/mpeg",
)
preview_ids.append(preview.generated_voice_id)
ids = ", ".join(f"`{p}`" for p in preview_ids)
return (
f"Generated {len(preview_ids)} voice previews "
f"(IDs: {ids}). Added '{voice_name}' with ID "
f"'{first_id}' to the voice map."
)
except Exception as exc:
logger.error(
"Voice design error: %s", exc, exc_info=True,
)
return f"Error designing voice: {exc}"
return f"Error: Unknown action '{action}'."