Source code for tools.tool_vector_search

"""Semantic search across message history using vector embeddings."""

from __future__ import annotations

import jsonutil as json
import logging
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from tool_context import ToolContext

logger = logging.getLogger(__name__)

TOOL_NAME = "semantic_search"
TOOL_DESCRIPTION = (
    "Perform a semantic search across message history using vector embeddings. "
    "Returns messages that are semantically similar to the query."
)
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "query": {
            "type": "string",
            "description": "Search query text",
        },
        "top_k": {
            "type": "integer",
            "description": "Number of results (default 10, max 50)",
        },
        "channel_id": {
            "type": "string",
            "description": "Optional channel ID to filter results",
        },
    },
    "required": ["query"],
}



[docs]
async def run(
    query: str, top_k: int = 10, channel_id: str = "", *, ctx: ToolContext | None = None
) -> str:
    """Execute this tool and return the result.

    Args:
        query (str): Search query or input string.
        top_k (int): Maximum number of results to return (clamped to 1-50).
        channel_id (str): Optional Discord/Matrix channel identifier to filter
            results. Empty string searches across all channels.
        ctx (ToolContext | None): Tool execution context; its ``message_cache``
            supplies the vector search backend.

    Returns:
        str: JSON string containing the query, result count, and matched messages.
    """
    top_k = max(1, min(top_k, 50))

    if ctx is None or ctx.message_cache is None:
        return json.dumps({"error": "Message cache not available for vector search"})

    try:
        mc = ctx.message_cache
        results = await mc.search_messages(
            query=query,
            limit=top_k,
            channel_id=channel_id or None,
        )

        return json.dumps(
            {
                "query": query,
                "top_k": top_k,
                "channel_id": channel_id or None,
                "num_results": len(results),
                "results": results,
            },
            default=str,
        )

    except AttributeError:
        return json.dumps(
            {
                "query": query,
                "num_results": 0,
                "results": [],
                "error": "Message cache does not support vector search",
            }
        )
    except Exception as e:
        logger.error("Semantic search failed: %s", e, exc_info=True)
        return json.dumps(
            {
                "query": query,
                "num_results": 0,
                "results": [],
                "error": str(e),
            }
        )