Source code for jsonutil

"""JSON helpers backed by orjson; falls back to stdlib ``json`` when required.

Drop-in replacement for the stdlib :mod:`json` module used across every
Stargazer service -- callers do ``import jsonutil as json`` and then use the
familiar :func:`dumps`, :func:`loads`, :func:`dump`, and :func:`load` names. The
fast :mod:`orjson` path is taken whenever the requested options fall within its
capabilities; otherwise the call transparently routes to the stdlib serializer
(decided by :func:`_needs_stdlib_dumps` and :func:`_option_bits`) so behaviour
stays compatible. ``loads`` likewise retries on the stdlib decoder for inputs
orjson rejects (e.g. lone surrogates).

These functions perform no I/O of their own beyond what the caller's file handle
does; they encode and decode in memory and are exercised pervasively for Redis
payloads, tool arguments, cached blobs, and LLM/JSON responses throughout the
repo. ``JSONDecodeError`` is aliased to the stdlib exception so that a single
``except json.JSONDecodeError`` catches failures from either backend.
"""

from __future__ import annotations

import json as _stdlib_json
from typing import IO, Any, Callable

import orjson

# Base class so ``except json.JSONDecodeError`` catches orjson and stdlib errors.
JSONDecodeError = _stdlib_json.JSONDecodeError


def _option_bits(*, indent: int | None, sort_keys: bool) -> int:
    """Translate stdlib-style serialization flags into an orjson option bitmask.

    Maps the ``indent`` and ``sort_keys`` arguments onto the corresponding
    ``orjson.OPT_*`` constants so the fast orjson path can honour them. Only the
    indents orjson natively supports are mapped; ``indent == 4`` falls back to
    ``OPT_INDENT_2`` when the running orjson lacks ``OPT_INDENT_4``.

    This is called by :func:`dumps` (its only caller) when assembling the
    ``option`` keyword for ``orjson.dumps``; it performs no I/O or other side
    effects.

    Args:
        indent: Indentation width (``2`` or ``4``) or ``None`` for compact
            output. Only ``2`` and ``4`` are recognised here; other values
            contribute no bits (the stdlib path handles them upstream).
        sort_keys: When ``True``, add ``OPT_SORT_KEYS`` so object keys are
            emitted in sorted order.

    Returns:
        int: A bitwise-OR of the selected ``orjson.OPT_*`` flags (``0`` when
        none apply), suitable for the ``option`` argument of ``orjson.dumps``.
    """
    opts = 0
    if sort_keys:
        opts |= orjson.OPT_SORT_KEYS
    if indent == 2:
        opts |= orjson.OPT_INDENT_2
    elif indent == 4:
        opts |= getattr(orjson, "OPT_INDENT_4", orjson.OPT_INDENT_2)
    return opts


def _needs_stdlib_dumps(
    *,
    skipkeys: bool,
    ensure_ascii: bool,
    check_circular: bool,
    allow_nan: bool,
    cls: type | None,
    indent: int | str | None,
    separators: tuple[str, str] | None,
) -> bool:
    """Decide whether a ``dumps`` call must fall back to the stdlib serializer.

    orjson is fast but supports only a subset of stdlib ``json.dumps`` behaviour.
    This predicate inspects the requested options and returns ``True`` whenever
    any of them lies outside orjson's capabilities -- e.g. ``skipkeys``, a custom
    encoder ``cls``, disabled ``check_circular``, ``allow_nan`` semantics
    differences, ``ensure_ascii`` escaping, string/unsupported integer indents,
    or non-default ``separators`` -- so the caller can route to
    ``_stdlib_json.dumps`` instead.

    This is called only by :func:`dumps`, which uses the result to choose its
    serialization backend; it performs no I/O or other side effects.

    Args:
        skipkeys: Stdlib flag to skip non-basic dict keys (unsupported by orjson).
        ensure_ascii: When ``True``, non-ASCII must be escaped (orjson always
            emits UTF-8, so this forces the stdlib path).
        check_circular: Stdlib circular-reference check; disabling it forces the
            stdlib path.
        allow_nan: When ``False``, NaN/Infinity must raise rather than serialize,
            forcing the stdlib path.
        cls: A custom ``JSONEncoder`` subclass; any value forces the stdlib path.
        indent: Indentation spec. String indents, and integer indents other than
            ``2`` or ``4``, force the stdlib path.
        separators: Item/key separators; anything other than ``None`` or the
            compact ``(",", ":")`` forces the stdlib path.

    Returns:
        bool: ``True`` if the stdlib serializer is required, ``False`` if the
        orjson fast path can satisfy the request.
    """
    if (
        skipkeys
        or cls is not None
        or not check_circular
        or not allow_nan
        or ensure_ascii
    ):
        return True
    if isinstance(indent, str):
        return True
    if isinstance(indent, int) and indent not in (2, 4):
        return True
    if separators is not None and separators != (",", ":"):
        return True
    return False


[docs] def dumps( obj: Any, *, skipkeys: bool = False, ensure_ascii: bool = False, check_circular: bool = True, allow_nan: bool = True, cls: type | None = None, indent: int | str | None = None, separators: tuple[str, str] | None = None, default: Callable[..., Any] | None = None, sort_keys: bool = False, ) -> str: """Serialize *obj* to a JSON string (UTF-8 text, like ``ensure_ascii=False``). The codebase's drop-in for ``json.dumps`` (imported as ``json``). It first asks :func:`_needs_stdlib_dumps` whether the requested formatting options exceed orjson's feature set; when they do it delegates to ``_stdlib_json.dumps`` with every argument forwarded unchanged, and otherwise takes the fast ``orjson.dumps`` path with options translated by :func:`_option_bits`. As a final safety net, a ``TypeError`` from orjson (notably lone surrogate code points in string values, which orjson refuses but stdlib accepts) is caught and retried on the stdlib serializer with ``ensure_ascii=False``. Unlike the stdlib default, ``ensure_ascii`` defaults to ``False`` here so non-ASCII text is emitted as UTF-8 rather than escaped. Pure in-memory serialization with no I/O. Called directly by :func:`dump` (which writes the result to a file) and, via the ``import jsonutil as json`` alias, by essentially every module that serializes data for Redis, tool payloads, prompt overlays, or persisted state. Args: obj: The Python object to serialize. skipkeys: Skip non-basic dict keys (forces the stdlib path). ensure_ascii: Escape non-ASCII when ``True`` (forces the stdlib path); defaults to ``False`` so output is UTF-8. check_circular: Enable circular-reference detection; disabling it forces the stdlib path. allow_nan: Permit ``NaN``/``Infinity`` output; ``False`` forces the stdlib path. cls: Optional custom ``JSONEncoder`` subclass; any value forces the stdlib path. indent: Indentation spec. Integer ``2`` or ``4`` use orjson's native indent; string indents and other integers force the stdlib path; ``None`` produces compact output. separators: Item/key separator pair; anything other than ``None`` or the compact ``(",", ":")`` forces the stdlib path. default: Fallback callable invoked for otherwise-unserializable values. sort_keys: Emit object keys in sorted order. Returns: str: The serialized JSON text. Raises: TypeError: Propagated from the stdlib serializer for objects that remain unserializable (e.g. when no suitable ``default`` is supplied). """ if _needs_stdlib_dumps( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, cls=cls, indent=indent, separators=separators, ): return _stdlib_json.dumps( obj, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, cls=cls, indent=indent, separators=separators, default=default, sort_keys=sort_keys, ) opts = _option_bits( indent=indent if isinstance(indent, int) else None, sort_keys=sort_keys ) kw: dict[str, Any] = {"option": opts} if default is not None: kw["default"] = default try: return orjson.dumps(obj, **kw).decode("utf-8") except TypeError: # e.g. lone surrogates in str values — stdlib json accepts these return _stdlib_json.dumps( obj, indent=indent if isinstance(indent, int) else None, separators=separators, default=default, sort_keys=sort_keys, ensure_ascii=False, )
[docs] def loads(s: str | bytes | bytearray | memoryview) -> Any: """Deserialize a JSON document from text or bytes into a Python object. Parses with the fast ``orjson.loads`` first and, if that raises a ``JSONDecodeError`` (notably for inputs containing lone surrogates that orjson rejects), retries with the more permissive stdlib ``json.loads``, decoding bytes with ``errors="surrogatepass"`` so such characters survive. This is the drop-in replacement for ``json.loads`` used throughout the codebase, which imports this module as ``json`` (e.g. ``import jsonutil as json``). :func:`load` calls it after reading a file's contents, and many callers across services use it to decode Redis payloads, tool arguments, cached blobs, and LLM/JSON responses. It performs no I/O of its own. Args: s: The JSON document as ``str``, ``bytes``, ``bytearray``, or ``memoryview`` (a ``memoryview`` is materialized to ``bytes`` only on the stdlib fallback). Returns: Any: The decoded Python object (dict, list, str, int, float, bool, or ``None``). Raises: JSONDecodeError: If the input is not valid JSON for either backend. """ try: return orjson.loads(s) except JSONDecodeError: if isinstance(s, memoryview): s = s.tobytes() if isinstance(s, (bytes, bytearray)): text = s.decode("utf-8", errors="surrogatepass") else: text = s return _stdlib_json.loads(text)
[docs] def load(fp: IO[str]) -> Any: """Read a file-like object and deserialize its full contents as JSON. Reads the entire stream via ``fp.read()`` and delegates parsing to :func:`loads`, inheriting its orjson-first/stdlib-fallback behaviour. This is the drop-in ``json.load`` for the codebase (imported as ``json``). It calls :func:`loads` and triggers I/O through ``fp.read()``. Callers open a file and pass the handle -- e.g. ``threadweave.py`` loads persisted state, and various parsers/classifier index loaders (``scripts/...``, ``classifiers/...``) read JSON fixtures and index files this way. Args: fp: An open, readable text file-like object positioned at the start of a JSON document. Returns: Any: The decoded Python object. Raises: JSONDecodeError: If the file contents are not valid JSON. """ return loads(fp.read())
[docs] def dump( obj: Any, fp: IO[str], *, skipkeys: bool = False, ensure_ascii: bool = False, check_circular: bool = True, allow_nan: bool = True, cls: type | None = None, indent: int | str | None = None, separators: tuple[str, str] | None = None, default: Callable[..., Any] | None = None, sort_keys: bool = False, ) -> None: """Serialize *obj* to JSON and write the result to a file-like object. Produces the JSON text via :func:`dumps` (forwarding every stdlib-style formatting option unchanged) and writes it in a single ``fp.write`` call. Because :func:`dumps` decides between the orjson fast path and the stdlib fallback, the same option compatibility rules apply here. This is the drop-in ``json.dump`` for the codebase (imported as ``json``). It calls :func:`dumps` to build the payload and performs I/O through ``fp.write``. Callers open a file for writing and pass the handle -- e.g. ``threadweave.py`` persists state with ``indent=2``, ``memories_port`` export scripts dump memory exports, and classifier/tool index builders write index files this way. Args: obj: The Python object to serialize. fp: An open, writable text file-like object to receive the JSON text. skipkeys: Forwarded to :func:`dumps`; skip non-basic dict keys. ensure_ascii: Forwarded to :func:`dumps`; escape non-ASCII when ``True``. check_circular: Forwarded to :func:`dumps`; enable circular-reference checking. allow_nan: Forwarded to :func:`dumps`; permit NaN/Infinity output. cls: Forwarded to :func:`dumps`; optional custom ``JSONEncoder`` class. indent: Forwarded to :func:`dumps`; indentation spec for pretty output. separators: Forwarded to :func:`dumps`; item/key separator pair. default: Forwarded to :func:`dumps`; fallback serializer for otherwise-unserializable values. sort_keys: Forwarded to :func:`dumps`; emit object keys in sorted order. Raises: TypeError: Propagated from :func:`dumps` for unserializable objects on the stdlib path. """ fp.write( dumps( obj, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, cls=cls, indent=indent, separators=separators, default=default, sort_keys=sort_keys, ), )