Source code for tool_loader
"""Auto-discover and load tools from a directory of Python scripts.
Each ``.py`` file in the tools directory must expose **either**:
**Single-tool format** (one tool per file):
* ``TOOL_NAME`` -- unique name for the tool (str).
* ``TOOL_DESCRIPTION`` -- human-readable description (str).
* ``TOOL_PARAMETERS`` -- JSON Schema ``object`` for accepted args (dict).
* ``async def run(**kwargs) -> str`` -- the tool handler.
**Multi-tool format** (multiple tools per file):
* ``TOOLS`` -- a list of dicts, each with keys:
``name``, ``description``, ``parameters``, ``handler``.
Malformed files are logged as warnings but do **not** prevent the rest of
the tools from loading.
"""
from __future__ import annotations
import importlib.util
import logging
import os
import sys
import tempfile
from pathlib import Path
import jsonutil as json
from tools import ToolDefinition, ToolRegistry
logger = logging.getLogger(__name__)
_MANIFEST_NAME = "_manifest.json"
def _manifest_path(tools_path: Path) -> Path:
"""Return the path to the tools allow-list manifest inside *tools_path*.
Joins *tools_path* with the module-level ``_MANIFEST_NAME``
(``"_manifest.json"``) to locate the JSON file that gates which tool
scripts may be loaded. Pure path arithmetic with no filesystem access.
Called internally by :func:`load_tool_manifest_allowlist` (to read the
allow-list) and :func:`append_tool_manifest` (to locate and rewrite the
manifest); no external callers were found.
Args:
tools_path: Directory holding the tool ``*.py`` scripts and the
manifest file (e.g. ``Path("tools")``).
Returns:
Path: The ``tools_path / "_manifest.json"`` location (which may or
may not exist on disk).
"""
return tools_path / _MANIFEST_NAME
[docs]
def load_tool_manifest_allowlist(tools_path: Path) -> set[str]:
"""Read the manifest allow-list of tool script basenames.
Loads and parses ``tools/_manifest.json`` to obtain the explicit set of
``*.py`` filenames that ``load_tools`` is permitted to import. This
allow-list is the security gate that stops arbitrary or stray files in
the tools directory from being executed at load time, so a missing or
unreadable manifest deliberately yields an empty set (and therefore no
tools load) rather than falling back to "load everything".
It locates the file via ``_manifest_path``, reads it from the filesystem,
and JSON-decodes it; any read or parse error is logged and swallowed,
returning an empty set. No Redis, network, or LLM interaction. Called by
``load_tools`` (to filter the directory scan) and by ``append_tool_manifest``
(to merge in a new entry).
Args:
tools_path: Directory containing the tool scripts and the manifest.
Returns:
set[str]: The allowed basenames (e.g. ``{"foo.py", "bar.py"}``);
empty if the manifest is missing, unreadable, or has no ``allow``
list.
"""
mp = _manifest_path(tools_path)
if not mp.is_file():
logger.error(
"tools manifest missing (%s) — no tools will load; "
"restore or regenerate tools/_manifest.json",
mp,
)
return set()
try:
data = json.loads(mp.read_text(encoding="utf-8"))
except Exception:
logger.exception("Failed to read tools manifest %s", mp)
return set()
allow = data.get("allow") or []
return {str(x) for x in allow if isinstance(x, str)}
[docs]
def append_tool_manifest(
tool_basename: str,
tools_dir: str | Path = "tools",
) -> None:
"""Atomically add a tool basename to the manifest allow-list.
Extends ``tools/_manifest.json`` so that a newly written tool script
becomes loadable on the next scan. This is the write counterpart to
``load_tool_manifest_allowlist`` and exists so tools that author other
tools at runtime can self-register without a manual manifest edit; the
write is done atomically so a concurrent or crashed run never leaves a
half-written manifest.
It resolves the manifest via ``_manifest_path`` (raising if it does not
exist), merges ``tool_basename`` into the current allow-list read back via
``load_tool_manifest_allowlist``, then writes the JSON to a temp file in
the same directory and ``os.replace``-es it over the original (cleaning up
the temp file on error). Filesystem only — no Redis, network, or LLM.
Called by the ``write_python_tool`` tool handler
(``tools/write_python_tool.py``), which invokes it via
``asyncio.to_thread`` after persisting a generated tool file.
Args:
tool_basename: The ``*.py`` filename to allow (e.g. ``"foo.py"``).
tools_dir: Directory holding the tool scripts and manifest; defaults
to ``"tools"``.
Raises:
FileNotFoundError: If the manifest file does not already exist.
"""
tools_path = Path(tools_dir)
mp = _manifest_path(tools_path)
if not mp.is_file():
raise FileNotFoundError(f"tools manifest not found: {mp}")
allow = sorted(load_tool_manifest_allowlist(tools_path) | {tool_basename})
payload = json.dumps({"allow": allow}, indent=2, sort_keys=False)
fd, tmp = tempfile.mkstemp(
prefix="manifest.",
suffix=".json",
dir=str(tools_path),
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(payload)
os.replace(tmp, mp)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
logger.info("Updated tools manifest: added %s", tool_basename)
_REQUIRED_ATTRS = ("TOOL_NAME", "TOOL_DESCRIPTION", "TOOL_PARAMETERS", "run")
[docs]
def load_tools(directory: str | Path, registry: ToolRegistry) -> None:
"""Discover, import, and register every allow-listed tool script.
Walks a tools directory and turns each permitted ``*.py`` file into one
or more registered tools on ``registry``. This is the entry point that
populates the live tool set the LLM can call, supporting both the
single-tool module format (``TOOL_NAME`` / ``TOOL_DESCRIPTION`` /
``TOOL_PARAMETERS`` / ``run``) and the multi-tool ``TOOLS`` list format,
plus optional aliases and per-tool flags. It is written to be resilient:
a malformed or failing file is logged and skipped so one bad tool never
blocks the rest.
It reads the manifest allow-list via ``load_tool_manifest_allowlist`` and
refuses to load anything not listed (an empty allow-list loads nothing).
For each allowed file it imports the module through ``importlib`` — reusing
an already-imported ``tools.<stem>`` from ``sys.modules`` when present, or
executing the file fresh and registering it under both its canonical and
a private alias name — then mutates ``registry._tools`` directly to insert
``ToolDefinition`` entries (including any ``TOOL_ALIASES``). Touches the
filesystem (directory glob and module exec) but no Redis, network, or LLM.
Called from many places that build or rebuild the registry: the
classifier index builders (``classifiers/build_tool_index.py``,
``classifiers/update_tool_embeddings.py``), the live ``reload_tools`` tool,
the ``write_python_tool`` and ``import_mcp_tool`` handlers after authoring a
new tool, and several tests.
Args:
directory: Path to the tools directory to scan (e.g. ``"tools"``).
A missing directory is warned about and skipped.
registry: The :class:`ToolRegistry` to populate; its internal
``_tools`` mapping is mutated in place.
"""
tools_path = Path(directory)
if not tools_path.is_dir():
logger.warning(
"Tools directory does not exist: %s – skipped",
tools_path,
)
return
allowed = load_tool_manifest_allowlist(tools_path)
if not allowed:
logger.error(
"Tool manifest allow-list is empty — no tools loaded from %s", tools_path
)
return
for py_file in sorted(tools_path.glob("*.py")):
if py_file.name.startswith("_"):
continue # skip __init__.py, __pycache__ artefacts, etc.
if py_file.name not in allowed:
logger.warning(
"Skipping tool file not in manifest allow-list: %s",
py_file.name,
)
continue
module_name = f"_tool_{py_file.stem}"
canonical = f"tools.{py_file.stem}"
try:
if canonical in sys.modules:
module = sys.modules[canonical]
sys.modules[module_name] = module # alias only, no re-exec
else:
spec = importlib.util.spec_from_file_location(
canonical,
py_file,
)
if spec is None or spec.loader is None:
logger.warning(
"Could not create module spec for %s",
py_file,
)
continue
module = importlib.util.module_from_spec(spec)
module.__package__ = "tools"
sys.modules[canonical] = module
sys.modules[module_name] = module
spec.loader.exec_module(module)
# --- Multi-tool format: TOOLS list ---------------------------
tools_list = getattr(module, "TOOLS", None)
if tools_list is not None and isinstance(tools_list, list):
count = 0
for entry in tools_list:
t_name = entry.get("name")
t_desc = entry.get("description")
t_params = entry.get("parameters")
t_handler = entry.get("handler")
valid = (
t_name
and t_desc
and t_params is not None
and callable(t_handler)
)
if not valid:
logger.warning(
"Tool %s: malformed TOOLS entry %r – skip",
py_file.name,
entry.get("name", "?"),
)
continue
registry._tools[t_name] = ToolDefinition(
name=t_name,
description=t_desc,
parameters=t_params,
handler=t_handler,
no_background=bool(entry.get("no_background")),
allow_repeat=bool(entry.get("allow_repeat")),
)
count += 1
logger.info(
"Loaded %d tool(s) from %s (multi-tool)",
count,
py_file.name,
)
continue
# --- Single-tool format --------------------------------------
missing = [a for a in _REQUIRED_ATTRS if not hasattr(module, a)]
if missing:
logger.warning(
"Tool %s missing attrs: %s – skip",
py_file.name,
", ".join(missing),
)
continue
handler = getattr(module, "run")
if not callable(handler):
logger.warning(
"Tool %s: 'run' not callable – skip",
py_file.name,
)
continue
tool_name: str = getattr(module, "TOOL_NAME")
tool_desc: str = getattr(module, "TOOL_DESCRIPTION")
tool_params: dict = getattr(module, "TOOL_PARAMETERS")
no_bg = bool(getattr(module, "TOOL_NO_BACKGROUND", False))
allow_repeat = bool(getattr(module, "TOOL_ALLOW_REPEAT", False))
registry._tools[tool_name] = ToolDefinition(
name=tool_name,
description=tool_desc,
parameters=tool_params,
handler=handler,
no_background=no_bg,
allow_repeat=allow_repeat,
)
logger.info(
"Loaded tool: %s (%s)",
tool_name,
py_file.name,
)
# --- Aliases (optional TOOL_ALIASES list) --------------------
aliases = getattr(module, "TOOL_ALIASES", None)
if aliases and isinstance(aliases, (list, tuple)):
for alias in aliases:
if not isinstance(alias, str) or not alias.strip():
continue
registry._tools[alias] = ToolDefinition(
name=alias,
description=tool_desc,
parameters=tool_params,
handler=handler,
no_background=no_bg,
allow_repeat=allow_repeat,
)
logger.info(
"Registered aliases for %s: %s",
tool_name,
", ".join(aliases),
)
except Exception:
logger.exception(
"Failed to load tool from %s",
py_file.name,
)
logger.info(
"Tool loading complete – %d tool(s) registered",
len(registry),
)