Source code for tool_loader

"""Auto-discover and load tools from a directory of Python scripts.

Each ``.py`` file in the tools directory must expose **either**:

**Single-tool format** (one tool per file):

* ``TOOL_NAME`` -- unique name for the tool (str).
* ``TOOL_DESCRIPTION`` -- human-readable description (str).
* ``TOOL_PARAMETERS`` -- JSON Schema ``object`` for accepted args (dict).
* ``async def run(**kwargs) -> str`` -- the tool handler.

**Multi-tool format** (multiple tools per file):

* ``TOOLS`` -- a list of dicts, each with keys:
  ``name``, ``description``, ``parameters``, ``handler``.

Malformed files are logged as warnings but do **not** prevent the rest of
the tools from loading.
"""

from __future__ import annotations

import importlib.util
import logging
import os
import sys
import tempfile
from pathlib import Path

import jsonutil as json

from tools import ToolDefinition, ToolRegistry

logger = logging.getLogger(__name__)

_MANIFEST_NAME = "_manifest.json"


def _manifest_path(tools_path: Path) -> Path:
    """Return the path to the tools allow-list manifest inside *tools_path*.

    Joins *tools_path* with the module-level ``_MANIFEST_NAME``
    (``"_manifest.json"``) to locate the JSON file that gates which tool
    scripts may be loaded. Pure path arithmetic with no filesystem access.

    Called internally by :func:`load_tool_manifest_allowlist` (to read the
    allow-list) and :func:`append_tool_manifest` (to locate and rewrite the
    manifest); no external callers were found.

    Args:
        tools_path: Directory holding the tool ``*.py`` scripts and the
            manifest file (e.g. ``Path("tools")``).

    Returns:
        Path: The ``tools_path / "_manifest.json"`` location (which may or
        may not exist on disk).
    """
    return tools_path / _MANIFEST_NAME


[docs] def load_tool_manifest_allowlist(tools_path: Path) -> set[str]: """Read the manifest allow-list of tool script basenames. Loads and parses ``tools/_manifest.json`` to obtain the explicit set of ``*.py`` filenames that ``load_tools`` is permitted to import. This allow-list is the security gate that stops arbitrary or stray files in the tools directory from being executed at load time, so a missing or unreadable manifest deliberately yields an empty set (and therefore no tools load) rather than falling back to "load everything". It locates the file via ``_manifest_path``, reads it from the filesystem, and JSON-decodes it; any read or parse error is logged and swallowed, returning an empty set. No Redis, network, or LLM interaction. Called by ``load_tools`` (to filter the directory scan) and by ``append_tool_manifest`` (to merge in a new entry). Args: tools_path: Directory containing the tool scripts and the manifest. Returns: set[str]: The allowed basenames (e.g. ``{"foo.py", "bar.py"}``); empty if the manifest is missing, unreadable, or has no ``allow`` list. """ mp = _manifest_path(tools_path) if not mp.is_file(): logger.error( "tools manifest missing (%s) — no tools will load; " "restore or regenerate tools/_manifest.json", mp, ) return set() try: data = json.loads(mp.read_text(encoding="utf-8")) except Exception: logger.exception("Failed to read tools manifest %s", mp) return set() allow = data.get("allow") or [] return {str(x) for x in allow if isinstance(x, str)}
[docs] def append_tool_manifest( tool_basename: str, tools_dir: str | Path = "tools", ) -> None: """Atomically add a tool basename to the manifest allow-list. Extends ``tools/_manifest.json`` so that a newly written tool script becomes loadable on the next scan. This is the write counterpart to ``load_tool_manifest_allowlist`` and exists so tools that author other tools at runtime can self-register without a manual manifest edit; the write is done atomically so a concurrent or crashed run never leaves a half-written manifest. It resolves the manifest via ``_manifest_path`` (raising if it does not exist), merges ``tool_basename`` into the current allow-list read back via ``load_tool_manifest_allowlist``, then writes the JSON to a temp file in the same directory and ``os.replace``-es it over the original (cleaning up the temp file on error). Filesystem only — no Redis, network, or LLM. Called by the ``write_python_tool`` tool handler (``tools/write_python_tool.py``), which invokes it via ``asyncio.to_thread`` after persisting a generated tool file. Args: tool_basename: The ``*.py`` filename to allow (e.g. ``"foo.py"``). tools_dir: Directory holding the tool scripts and manifest; defaults to ``"tools"``. Raises: FileNotFoundError: If the manifest file does not already exist. """ tools_path = Path(tools_dir) mp = _manifest_path(tools_path) if not mp.is_file(): raise FileNotFoundError(f"tools manifest not found: {mp}") allow = sorted(load_tool_manifest_allowlist(tools_path) | {tool_basename}) payload = json.dumps({"allow": allow}, indent=2, sort_keys=False) fd, tmp = tempfile.mkstemp( prefix="manifest.", suffix=".json", dir=str(tools_path), ) try: with os.fdopen(fd, "w", encoding="utf-8") as f: f.write(payload) os.replace(tmp, mp) except Exception: try: os.unlink(tmp) except OSError: pass raise logger.info("Updated tools manifest: added %s", tool_basename)
_REQUIRED_ATTRS = ("TOOL_NAME", "TOOL_DESCRIPTION", "TOOL_PARAMETERS", "run")
[docs] def load_tools(directory: str | Path, registry: ToolRegistry) -> None: """Discover, import, and register every allow-listed tool script. Walks a tools directory and turns each permitted ``*.py`` file into one or more registered tools on ``registry``. This is the entry point that populates the live tool set the LLM can call, supporting both the single-tool module format (``TOOL_NAME`` / ``TOOL_DESCRIPTION`` / ``TOOL_PARAMETERS`` / ``run``) and the multi-tool ``TOOLS`` list format, plus optional aliases and per-tool flags. It is written to be resilient: a malformed or failing file is logged and skipped so one bad tool never blocks the rest. It reads the manifest allow-list via ``load_tool_manifest_allowlist`` and refuses to load anything not listed (an empty allow-list loads nothing). For each allowed file it imports the module through ``importlib`` — reusing an already-imported ``tools.<stem>`` from ``sys.modules`` when present, or executing the file fresh and registering it under both its canonical and a private alias name — then mutates ``registry._tools`` directly to insert ``ToolDefinition`` entries (including any ``TOOL_ALIASES``). Touches the filesystem (directory glob and module exec) but no Redis, network, or LLM. Called from many places that build or rebuild the registry: the classifier index builders (``classifiers/build_tool_index.py``, ``classifiers/update_tool_embeddings.py``), the live ``reload_tools`` tool, the ``write_python_tool`` and ``import_mcp_tool`` handlers after authoring a new tool, and several tests. Args: directory: Path to the tools directory to scan (e.g. ``"tools"``). A missing directory is warned about and skipped. registry: The :class:`ToolRegistry` to populate; its internal ``_tools`` mapping is mutated in place. """ tools_path = Path(directory) if not tools_path.is_dir(): logger.warning( "Tools directory does not exist: %s – skipped", tools_path, ) return allowed = load_tool_manifest_allowlist(tools_path) if not allowed: logger.error( "Tool manifest allow-list is empty — no tools loaded from %s", tools_path ) return for py_file in sorted(tools_path.glob("*.py")): if py_file.name.startswith("_"): continue # skip __init__.py, __pycache__ artefacts, etc. if py_file.name not in allowed: logger.warning( "Skipping tool file not in manifest allow-list: %s", py_file.name, ) continue module_name = f"_tool_{py_file.stem}" canonical = f"tools.{py_file.stem}" try: if canonical in sys.modules: module = sys.modules[canonical] sys.modules[module_name] = module # alias only, no re-exec else: spec = importlib.util.spec_from_file_location( canonical, py_file, ) if spec is None or spec.loader is None: logger.warning( "Could not create module spec for %s", py_file, ) continue module = importlib.util.module_from_spec(spec) module.__package__ = "tools" sys.modules[canonical] = module sys.modules[module_name] = module spec.loader.exec_module(module) # --- Multi-tool format: TOOLS list --------------------------- tools_list = getattr(module, "TOOLS", None) if tools_list is not None and isinstance(tools_list, list): count = 0 for entry in tools_list: t_name = entry.get("name") t_desc = entry.get("description") t_params = entry.get("parameters") t_handler = entry.get("handler") valid = ( t_name and t_desc and t_params is not None and callable(t_handler) ) if not valid: logger.warning( "Tool %s: malformed TOOLS entry %r – skip", py_file.name, entry.get("name", "?"), ) continue registry._tools[t_name] = ToolDefinition( name=t_name, description=t_desc, parameters=t_params, handler=t_handler, no_background=bool(entry.get("no_background")), allow_repeat=bool(entry.get("allow_repeat")), ) count += 1 logger.info( "Loaded %d tool(s) from %s (multi-tool)", count, py_file.name, ) continue # --- Single-tool format -------------------------------------- missing = [a for a in _REQUIRED_ATTRS if not hasattr(module, a)] if missing: logger.warning( "Tool %s missing attrs: %s – skip", py_file.name, ", ".join(missing), ) continue handler = getattr(module, "run") if not callable(handler): logger.warning( "Tool %s: 'run' not callable – skip", py_file.name, ) continue tool_name: str = getattr(module, "TOOL_NAME") tool_desc: str = getattr(module, "TOOL_DESCRIPTION") tool_params: dict = getattr(module, "TOOL_PARAMETERS") no_bg = bool(getattr(module, "TOOL_NO_BACKGROUND", False)) allow_repeat = bool(getattr(module, "TOOL_ALLOW_REPEAT", False)) registry._tools[tool_name] = ToolDefinition( name=tool_name, description=tool_desc, parameters=tool_params, handler=handler, no_background=no_bg, allow_repeat=allow_repeat, ) logger.info( "Loaded tool: %s (%s)", tool_name, py_file.name, ) # --- Aliases (optional TOOL_ALIASES list) -------------------- aliases = getattr(module, "TOOL_ALIASES", None) if aliases and isinstance(aliases, (list, tuple)): for alias in aliases: if not isinstance(alias, str) or not alias.strip(): continue registry._tools[alias] = ToolDefinition( name=alias, description=tool_desc, parameters=tool_params, handler=handler, no_background=no_bg, allow_repeat=allow_repeat, ) logger.info( "Registered aliases for %s: %s", tool_name, ", ".join(aliases), ) except Exception: logger.exception( "Failed to load tool from %s", py_file.name, ) logger.info( "Tool loading complete – %d tool(s) registered", len(registry), )