Source code for tools.elevenlabs_voice_design

"""Design a new voice with ElevenLabs and manage the voice map."""

from __future__ import annotations

import base64
import logging
import os
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from tool_context import ToolContext

logger = logging.getLogger(__name__)

TOOL_NAME = "elevenlabs_voice_design"
TOOL_DESCRIPTION = (
    "Design a new voice with ElevenLabs or manage the voice map. "
    "Actions: 'design' (create a new voice from a description), "
    "'add' (add an existing voice ID to the map), "
    "'list' (show all available voices)."
)
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "action": {
            "type": "string",
            "enum": ["design", "add", "list"],
            "description": "The voice management action.",
        },
        "description": {
            "type": "string",
            "description": ("Voice description (design only, min 20 chars)."),
        },
        "voice_name": {
            "type": "string",
            "description": ("Name to save the voice as (design/add)."),
        },
        "voice_id": {
            "type": "string",
            "description": ("Existing ElevenLabs voice ID (add only)."),
        },
        "text": {
            "type": "string",
            "description": (
                "Preview text for design (min 100 chars, "
                "or omit for auto-generated)."
            ),
        },
        "model_id": {
            "type": "string",
            "description": (
                "ElevenLabs model. Default " "'eleven_multilingual_ttv_v2'."
            ),
        },
        "guidance_scale": {
            "type": "number",
            "description": (
                "How closely AI follows prompt. Lower = more " "creative. Default 5.0."
            ),
        },
        "seed": {
            "type": "integer",
            "description": "Random seed for reproducibility.",
        },
    },
    "required": ["action"],
}



[docs]
async def run(
    action: str,
    description: str | None = None,
    voice_name: str | None = None,
    voice_id: str | None = None,
    text: str | None = None,
    model_id: str = "eleven_multilingual_ttv_v2",
    guidance_scale: float = 5.0,
    seed: int | None = None,
    ctx: ToolContext | None = None,
) -> str:
    # Import the shared voice map from tts module
    """Execute this tool and return the result.

    Args:
        action (str): The action value.
        description (str | None): Human-readable description.
        voice_name (str | None): The voice name value.
        voice_id (str | None): The voice id value.
        text (str | None): Text content.
        model_id (str): The model id value.
        guidance_scale (float): The guidance scale value.
        seed (int | None): The seed value.
        ctx (ToolContext | None): Tool execution context providing access to bot internals.

    Returns:
        str: Result string.
    """
    from tools.elevenlabs_tts import _voice_map

    # --- LIST ---
    if action == "list":
        if not _voice_map:
            return "No voices in the voice map."
        lines = ["**Available voices:**"]
        for name, vid in sorted(_voice_map.items()):
            lines.append(f"  {name}: {vid}")
        return "\n".join(lines)

    # --- ADD ---
    if action == "add":
        if not voice_name or not voice_id:
            return "Error: 'voice_name' and 'voice_id' are " "required for add."
        if voice_name in _voice_map:
            return (
                f"Error: Voice '{voice_name}' already exists. "
                f"Choose a different name."
            )
        _voice_map[voice_name] = voice_id
        return f"Added voice '{voice_name}' with ID " f"'{voice_id}' to the voice map."

    # --- DESIGN ---
    if action == "design":
        if not voice_name:
            return "Error: 'voice_name' is required for design."
        if not description or len(description) < 20:
            return (
                "Error: 'description' is required and must be "
                "at least 20 characters."
            )
        if voice_name in _voice_map:
            return f"Error: Voice '{voice_name}' already exists."

        api_key = None
        if ctx and ctx.redis and ctx.user_id:
            from tools.manage_api_keys import get_user_api_key

            api_key = await get_user_api_key(
                ctx.user_id,
                "elevenlabs",
                redis_client=ctx.redis,
                channel_id=ctx.channel_id,
                config=getattr(ctx, "config", None),
            )
        api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
        if not api_key:
            from tools.manage_api_keys import missing_api_key_error

            return missing_api_key_error("elevenlabs")

        if ctx is None or ctx.adapter is None:
            return "Error: No platform adapter available."

        try:
            from elevenlabs.client import AsyncElevenLabs

            el = AsyncElevenLabs(api_key=api_key)

            effective_text = text
            auto_gen = not text
            if text and len(text) < 100:
                effective_text = None
                auto_gen = True

            kwargs: dict = {
                "voice_description": description,
                "text": effective_text,
                "model_id": model_id,
                "guidance_scale": guidance_scale,
                "auto_generate_text": auto_gen,
            }
            if seed is not None:
                kwargs["seed"] = seed

            response = await el.text_to_voice.design(**kwargs)

            if not response.previews:
                return "Error: No voice previews generated."

            first_id = response.previews[0].generated_voice_id
            _voice_map[voice_name] = first_id

            preview_ids = []
            for preview in response.previews:
                audio = base64.b64decode(preview.audio_base_64)
                fname = f"preview_{preview.generated_voice_id}.mp3"
                await ctx.adapter.send_file(
                    ctx.channel_id,
                    audio,
                    fname,
                    "audio/mpeg",
                )
                preview_ids.append(preview.generated_voice_id)

            ids = ", ".join(f"`{p}`" for p in preview_ids)
            return (
                f"Generated {len(preview_ids)} voice previews "
                f"(IDs: {ids}). Added '{voice_name}' with ID "
                f"'{first_id}' to the voice map."
            )

        except Exception as exc:
            logger.error(
                "Voice design error: %s",
                exc,
                exc_info=True,
            )
            return f"Error designing voice: {exc}"

    return f"Error: Unknown action '{action}'."