"""JSON helpers backed by orjson; falls back to stdlib ``json`` when required.
Drop-in replacement for the stdlib :mod:`json` module used across every
Stargazer service -- callers do ``import jsonutil as json`` and then use the
familiar :func:`dumps`, :func:`loads`, :func:`dump`, and :func:`load` names. The
fast :mod:`orjson` path is taken whenever the requested options fall within its
capabilities; otherwise the call transparently routes to the stdlib serializer
(decided by :func:`_needs_stdlib_dumps` and :func:`_option_bits`) so behaviour
stays compatible. ``loads`` likewise retries on the stdlib decoder for inputs
orjson rejects (e.g. lone surrogates).
These functions perform no I/O of their own beyond what the caller's file handle
does; they encode and decode in memory and are exercised pervasively for Redis
payloads, tool arguments, cached blobs, and LLM/JSON responses throughout the
repo. ``JSONDecodeError`` is aliased to the stdlib exception so that a single
``except json.JSONDecodeError`` catches failures from either backend.
"""
from __future__ import annotations
import json as _stdlib_json
from typing import IO, Any, Callable
import orjson
# Base class so ``except json.JSONDecodeError`` catches orjson and stdlib errors.
JSONDecodeError = _stdlib_json.JSONDecodeError
def _option_bits(*, indent: int | None, sort_keys: bool) -> int:
"""Translate stdlib-style serialization flags into an orjson option bitmask.
Maps the ``indent`` and ``sort_keys`` arguments onto the corresponding
``orjson.OPT_*`` constants so the fast orjson path can honour them. Only the
indents orjson natively supports are mapped; ``indent == 4`` falls back to
``OPT_INDENT_2`` when the running orjson lacks ``OPT_INDENT_4``.
This is called by :func:`dumps` (its only caller) when assembling the
``option`` keyword for ``orjson.dumps``; it performs no I/O or other side
effects.
Args:
indent: Indentation width (``2`` or ``4``) or ``None`` for compact
output. Only ``2`` and ``4`` are recognised here; other values
contribute no bits (the stdlib path handles them upstream).
sort_keys: When ``True``, add ``OPT_SORT_KEYS`` so object keys are
emitted in sorted order.
Returns:
int: A bitwise-OR of the selected ``orjson.OPT_*`` flags (``0`` when
none apply), suitable for the ``option`` argument of ``orjson.dumps``.
"""
opts = 0
if sort_keys:
opts |= orjson.OPT_SORT_KEYS
if indent == 2:
opts |= orjson.OPT_INDENT_2
elif indent == 4:
opts |= getattr(orjson, "OPT_INDENT_4", orjson.OPT_INDENT_2)
return opts
def _needs_stdlib_dumps(
*,
skipkeys: bool,
ensure_ascii: bool,
check_circular: bool,
allow_nan: bool,
cls: type | None,
indent: int | str | None,
separators: tuple[str, str] | None,
) -> bool:
"""Decide whether a ``dumps`` call must fall back to the stdlib serializer.
orjson is fast but supports only a subset of stdlib ``json.dumps`` behaviour.
This predicate inspects the requested options and returns ``True`` whenever
any of them lies outside orjson's capabilities -- e.g. ``skipkeys``, a custom
encoder ``cls``, disabled ``check_circular``, ``allow_nan`` semantics
differences, ``ensure_ascii`` escaping, string/unsupported integer indents,
or non-default ``separators`` -- so the caller can route to
``_stdlib_json.dumps`` instead.
This is called only by :func:`dumps`, which uses the result to choose its
serialization backend; it performs no I/O or other side effects.
Args:
skipkeys: Stdlib flag to skip non-basic dict keys (unsupported by orjson).
ensure_ascii: When ``True``, non-ASCII must be escaped (orjson always
emits UTF-8, so this forces the stdlib path).
check_circular: Stdlib circular-reference check; disabling it forces the
stdlib path.
allow_nan: When ``False``, NaN/Infinity must raise rather than serialize,
forcing the stdlib path.
cls: A custom ``JSONEncoder`` subclass; any value forces the stdlib path.
indent: Indentation spec. String indents, and integer indents other than
``2`` or ``4``, force the stdlib path.
separators: Item/key separators; anything other than ``None`` or the
compact ``(",", ":")`` forces the stdlib path.
Returns:
bool: ``True`` if the stdlib serializer is required, ``False`` if the
orjson fast path can satisfy the request.
"""
if (
skipkeys
or cls is not None
or not check_circular
or not allow_nan
or ensure_ascii
):
return True
if isinstance(indent, str):
return True
if isinstance(indent, int) and indent not in (2, 4):
return True
if separators is not None and separators != (",", ":"):
return True
return False
[docs]
def dumps(
obj: Any,
*,
skipkeys: bool = False,
ensure_ascii: bool = False,
check_circular: bool = True,
allow_nan: bool = True,
cls: type | None = None,
indent: int | str | None = None,
separators: tuple[str, str] | None = None,
default: Callable[..., Any] | None = None,
sort_keys: bool = False,
) -> str:
"""Serialize *obj* to a JSON string (UTF-8 text, like ``ensure_ascii=False``).
The codebase's drop-in for ``json.dumps`` (imported as ``json``). It first
asks :func:`_needs_stdlib_dumps` whether the requested formatting options
exceed orjson's feature set; when they do it delegates to
``_stdlib_json.dumps`` with every argument forwarded unchanged, and otherwise
takes the fast ``orjson.dumps`` path with options translated by
:func:`_option_bits`. As a final safety net, a ``TypeError`` from orjson
(notably lone surrogate code points in string values, which orjson refuses
but stdlib accepts) is caught and retried on the stdlib serializer with
``ensure_ascii=False``. Unlike the stdlib default, ``ensure_ascii`` defaults
to ``False`` here so non-ASCII text is emitted as UTF-8 rather than escaped.
Pure in-memory serialization with no I/O. Called directly by :func:`dump`
(which writes the result to a file) and, via the ``import jsonutil as json``
alias, by essentially every module that serializes data for Redis, tool
payloads, prompt overlays, or persisted state.
Args:
obj: The Python object to serialize.
skipkeys: Skip non-basic dict keys (forces the stdlib path).
ensure_ascii: Escape non-ASCII when ``True`` (forces the stdlib path);
defaults to ``False`` so output is UTF-8.
check_circular: Enable circular-reference detection; disabling it forces
the stdlib path.
allow_nan: Permit ``NaN``/``Infinity`` output; ``False`` forces the
stdlib path.
cls: Optional custom ``JSONEncoder`` subclass; any value forces the
stdlib path.
indent: Indentation spec. Integer ``2`` or ``4`` use orjson's native
indent; string indents and other integers force the stdlib path;
``None`` produces compact output.
separators: Item/key separator pair; anything other than ``None`` or the
compact ``(",", ":")`` forces the stdlib path.
default: Fallback callable invoked for otherwise-unserializable values.
sort_keys: Emit object keys in sorted order.
Returns:
str: The serialized JSON text.
Raises:
TypeError: Propagated from the stdlib serializer for objects that remain
unserializable (e.g. when no suitable ``default`` is supplied).
"""
if _needs_stdlib_dumps(
skipkeys=skipkeys,
ensure_ascii=ensure_ascii,
check_circular=check_circular,
allow_nan=allow_nan,
cls=cls,
indent=indent,
separators=separators,
):
return _stdlib_json.dumps(
obj,
skipkeys=skipkeys,
ensure_ascii=ensure_ascii,
check_circular=check_circular,
allow_nan=allow_nan,
cls=cls,
indent=indent,
separators=separators,
default=default,
sort_keys=sort_keys,
)
opts = _option_bits(
indent=indent if isinstance(indent, int) else None, sort_keys=sort_keys
)
kw: dict[str, Any] = {"option": opts}
if default is not None:
kw["default"] = default
try:
return orjson.dumps(obj, **kw).decode("utf-8")
except TypeError:
# e.g. lone surrogates in str values — stdlib json accepts these
return _stdlib_json.dumps(
obj,
indent=indent if isinstance(indent, int) else None,
separators=separators,
default=default,
sort_keys=sort_keys,
ensure_ascii=False,
)
[docs]
def loads(s: str | bytes | bytearray | memoryview) -> Any:
"""Deserialize a JSON document from text or bytes into a Python object.
Parses with the fast ``orjson.loads`` first and, if that raises a
``JSONDecodeError`` (notably for inputs containing lone surrogates that
orjson rejects), retries with the more permissive stdlib ``json.loads``,
decoding bytes with ``errors="surrogatepass"`` so such characters survive.
This is the drop-in replacement for ``json.loads`` used throughout the
codebase, which imports this module as ``json`` (e.g. ``import jsonutil as
json``). :func:`load` calls it after reading a file's contents, and many
callers across services use it to decode Redis payloads, tool arguments,
cached blobs, and LLM/JSON responses. It performs no I/O of its own.
Args:
s: The JSON document as ``str``, ``bytes``, ``bytearray``, or
``memoryview`` (a ``memoryview`` is materialized to ``bytes`` only on
the stdlib fallback).
Returns:
Any: The decoded Python object (dict, list, str, int, float, bool, or
``None``).
Raises:
JSONDecodeError: If the input is not valid JSON for either backend.
"""
try:
return orjson.loads(s)
except JSONDecodeError:
if isinstance(s, memoryview):
s = s.tobytes()
if isinstance(s, (bytes, bytearray)):
text = s.decode("utf-8", errors="surrogatepass")
else:
text = s
return _stdlib_json.loads(text)
[docs]
def load(fp: IO[str]) -> Any:
"""Read a file-like object and deserialize its full contents as JSON.
Reads the entire stream via ``fp.read()`` and delegates parsing to
:func:`loads`, inheriting its orjson-first/stdlib-fallback behaviour. This
is the drop-in ``json.load`` for the codebase (imported as ``json``).
It calls :func:`loads` and triggers I/O through ``fp.read()``. Callers open a
file and pass the handle -- e.g. ``threadweave.py`` loads persisted state,
and various parsers/classifier index loaders (``scripts/...``,
``classifiers/...``) read JSON fixtures and index files this way.
Args:
fp: An open, readable text file-like object positioned at the start of a
JSON document.
Returns:
Any: The decoded Python object.
Raises:
JSONDecodeError: If the file contents are not valid JSON.
"""
return loads(fp.read())
[docs]
def dump(
obj: Any,
fp: IO[str],
*,
skipkeys: bool = False,
ensure_ascii: bool = False,
check_circular: bool = True,
allow_nan: bool = True,
cls: type | None = None,
indent: int | str | None = None,
separators: tuple[str, str] | None = None,
default: Callable[..., Any] | None = None,
sort_keys: bool = False,
) -> None:
"""Serialize *obj* to JSON and write the result to a file-like object.
Produces the JSON text via :func:`dumps` (forwarding every stdlib-style
formatting option unchanged) and writes it in a single ``fp.write`` call.
Because :func:`dumps` decides between the orjson fast path and the stdlib
fallback, the same option compatibility rules apply here. This is the
drop-in ``json.dump`` for the codebase (imported as ``json``).
It calls :func:`dumps` to build the payload and performs I/O through
``fp.write``. Callers open a file for writing and pass the handle -- e.g.
``threadweave.py`` persists state with ``indent=2``, ``memories_port`` export
scripts dump memory exports, and classifier/tool index builders write index
files this way.
Args:
obj: The Python object to serialize.
fp: An open, writable text file-like object to receive the JSON text.
skipkeys: Forwarded to :func:`dumps`; skip non-basic dict keys.
ensure_ascii: Forwarded to :func:`dumps`; escape non-ASCII when ``True``.
check_circular: Forwarded to :func:`dumps`; enable circular-reference
checking.
allow_nan: Forwarded to :func:`dumps`; permit NaN/Infinity output.
cls: Forwarded to :func:`dumps`; optional custom ``JSONEncoder`` class.
indent: Forwarded to :func:`dumps`; indentation spec for pretty output.
separators: Forwarded to :func:`dumps`; item/key separator pair.
default: Forwarded to :func:`dumps`; fallback serializer for
otherwise-unserializable values.
sort_keys: Forwarded to :func:`dumps`; emit object keys in sorted order.
Raises:
TypeError: Propagated from :func:`dumps` for unserializable objects on
the stdlib path.
"""
fp.write(
dumps(
obj,
skipkeys=skipkeys,
ensure_ascii=ensure_ascii,
check_circular=check_circular,
allow_nan=allow_nan,
cls=cls,
indent=indent,
separators=separators,
default=default,
sort_keys=sort_keys,
),
)