Source code for tools.search_tools

"""Search the tool registry by keyword or semantic (vector) similarity.

Keyword mode performs a fast case-insensitive substring match over tool names
and descriptions.  Semantic mode embeds the query with Gemini via
``OpenRouterEmbeddings`` and runs a RediSearch KNN query against the
pre-computed tool embedding index, falling back to keyword search if the
index is unavailable.
"""

from __future__ import annotations

import logging

import jsonutil as json

logger = logging.getLogger(__name__)

TOOL_NAME = "search_tools"
TOOL_DESCRIPTION = (
    "Search the tool registry for tools that match a query. "
    "Use mode='semantic' to find tools by meaning (vector similarity), "
    "or mode='keyword' for a fast substring match on tool names and descriptions. "
    "Returns tool names, descriptions, and optionally their parameter schemas. "
    "Semantic mode falls back to keyword search if the embedding index is unavailable."
)
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "query": {
            "type": "string",
            "description": (
                "What you are looking for. For semantic mode describe the "
                "capability you need (e.g. 'send a file to a channel'). "
                "For keyword mode this is a substring matched against tool "
                "names and descriptions."
            ),
        },
        "mode": {
            "type": "string",
            "enum": ["semantic", "keyword"],
            "description": (
                "'semantic' uses vector similarity search (recommended for "
                "open-ended capability queries). "
                "'keyword' does a fast case-insensitive substring match."
            ),
        },
        "top_k": {
            "type": "integer",
            "description": "Maximum number of results to return (default 10).",
        },
        "include_parameters": {
            "type": "boolean",
            "description": (
                "When true, include each tool's full parameter schema in the "
                "results (default false)."
            ),
        },
    },
    "required": ["query", "mode"],
}

_SEMANTIC_SIMILARITY_FLOOR = 0.30


[docs] async def run( query: str, mode: str, top_k: int = 10, include_parameters: bool = False, ctx=None, **_kwargs, ) -> str: """Search the tool registry and return matching tool definitions. Args: query: The search query string. mode: ``"semantic"`` or ``"keyword"``. top_k: Maximum number of results. include_parameters: Whether to include parameter schemas. ctx: Injected tool context. Returns: JSON string with ``success``, ``mode``, ``fallback``, ``query``, ``result_count``, and ``results`` fields. """ try: registry = getattr(ctx, "tool_registry", None) if ctx else None if registry is None: return json.dumps( { "success": False, "error": "Tool registry not available via ctx.", } ) mode = (mode or "keyword").lower().strip() if mode not in ("semantic", "keyword"): mode = "keyword" top_k = max(1, min(int(top_k), 100)) fallback = False if mode == "semantic": result = await _semantic_search( query, top_k, include_parameters, registry, ctx, ) if result is None: # Embedding or index unavailable — fall back to keyword. fallback = True mode = "keyword" else: result["fallback"] = False return json.dumps(result, indent=2) # Keyword search (also used as semantic fallback) results = _keyword_search(query, top_k, include_parameters, registry) return json.dumps( { "success": True, "mode": "keyword", "fallback": fallback, "query": query, "result_count": len(results), "results": results, }, indent=2, ) except Exception as exc: logger.exception("search_tools failed") return json.dumps({"success": False, "error": f"search_tools error: {exc}"})
# --------------------------------------------------------------------------- # Keyword search # --------------------------------------------------------------------------- def _keyword_search( query: str, top_k: int, include_parameters: bool, registry, ) -> list[dict]: """Substring-match tools by name or description, case-insensitively. The keyword retrieval path for ``search_tools`` and also the fallback used when semantic search is unavailable. It is dependency-free -- no embeddings, no Redis -- so it always works as long as the registry is present. Enumerates the live registry via ``registry.list_tools``, keeps tools whose name or description contains the lowercased *query*, sorts the survivors by name for stable output, truncates to *top_k*, and serializes each through the shared ``_tool_dict`` helper. Called by ``run`` in this module (both for explicit keyword mode and as the semantic fallback). Args: query: The substring to match (matched case-insensitively). top_k: Maximum number of results to keep. include_parameters: Whether each result should embed the tool's parameter schema. registry: The live tool registry exposing ``list_tools``. Returns: list[dict]: Up to *top_k* serialized tool dicts, name-sorted. """ kw = query.lower() matched = [ t for t in registry.list_tools() if kw in t.name.lower() or (t.description and kw in t.description.lower()) ] matched.sort(key=lambda t: t.name) matched = matched[:top_k] return [_tool_dict(t, include_parameters=include_parameters) for t in matched] # --------------------------------------------------------------------------- # Semantic search # --------------------------------------------------------------------------- async def _semantic_search( query: str, top_k: int, include_parameters: bool, registry, ctx, ) -> dict | None: """Run KNN search against the RediSearch tool embedding index. Returns a result dict on success, or ``None`` if the index or embeddings are unavailable (caller should fall back to keyword search). """ redis = getattr(ctx, "redis", None) if ctx else None if redis is None: logger.warning("search_tools semantic: no Redis client in ctx, falling back") return None try: from classifiers.redis_vector_index import ( knn_search_tools, redisearch_index_doc_count, ) from init_redis_indexes import TOOL_INDEX_NAME from rag_system.openrouter_embeddings import OpenRouterEmbeddings except ImportError as exc: logger.warning("search_tools semantic: import failed (%s), falling back", exc) return None # Check that the RediSearch index actually has documents. doc_count = await redisearch_index_doc_count(redis, TOOL_INDEX_NAME) if doc_count <= 0: logger.warning( "search_tools semantic: RediSearch index %r has no documents, falling back", TOOL_INDEX_NAME, ) return None # Embed the query. try: embed_client = OpenRouterEmbeddings() query_embedding = await embed_client.embed_text(query) except Exception as exc: logger.warning( "search_tools semantic: embedding failed (%s), falling back", exc ) return None if query_embedding is None or ( hasattr(query_embedding, "size") and query_embedding.size == 0 ): logger.warning("search_tools semantic: empty embedding returned, falling back") return None # Run KNN (over-fetch then threshold-filter). knn_k = max(top_k * 8, 64) try: raw_hits = await knn_search_tools(redis, query_embedding, knn_k=knn_k) except Exception as exc: logger.warning( "search_tools semantic: KNN search failed (%s), falling back", exc ) return None # Filter by similarity floor, sort descending, truncate. filtered = [ h for h in raw_hits if h.get("score", 0.0) >= _SEMANTIC_SIMILARITY_FLOOR ] filtered.sort(key=lambda h: h["score"], reverse=True) filtered = filtered[:top_k] # Cross-reference with live registry to get fresh descriptions / parameters. live: dict[str, object] = {t.name: t for t in registry.list_tools()} results = [] for hit in filtered: name = hit["name"] tool_def = live.get(name) if tool_def is None: # Tool in the index but not in the live registry — skip. continue entry = _tool_dict(tool_def, include_parameters=include_parameters) entry["score"] = round(float(hit["score"]), 4) results.append(entry) return { "success": True, "mode": "semantic", "query": query, "result_count": len(results), "results": results, } # --------------------------------------------------------------------------- # Shared helper # --------------------------------------------------------------------------- def _tool_dict(tool_def, *, include_parameters: bool) -> dict: """Project a tool definition into a JSON-serializable result entry. Shared formatter that both search paths use to shape each result uniformly, so the tool's response is consistent regardless of how the match was found. Always emits ``name`` and ``description``; the parameter schema is included only when asked, and then trimmed to just ``properties`` and ``required`` to keep the payload compact. Side-effect-free. Called by ``_keyword_search`` and ``_semantic_search`` in this module. Args: tool_def: A registry tool definition exposing ``name``, ``description`` and ``parameters``. include_parameters: When true, embed the (trimmed) parameter schema. Returns: dict: ``{"name": str, "description": str}`` plus an optional ``parameters`` block with ``properties`` and ``required``. """ doc: dict = { "name": tool_def.name, "description": tool_def.description or "", } if include_parameters and tool_def.parameters: doc["parameters"] = { "properties": tool_def.parameters.get("properties", {}), "required": tool_def.parameters.get("required", []), } return doc