Source code for ncm_delta_parser

"""NCM Delta Parser: resolves compact delta strings to full NCM vectors.

Parses the shorthand delta notation from reality_marble_recursion_index.yaml:
    "KOR+0.4 D1-0.3 OXT-0.2 5HT1A-0.2 ENT1↑ ACC↑ SERT.reversed"

Into a proper chemical vector dict:
    {"KAPPA_OPIOID_KOR": 0.4, "DOPAMINE_D1": -0.3, ...}
"""

from __future__ import annotations

import logging
import os
import re
from typing import Any, Dict, List, Optional, Tuple

import yaml

logger = logging.getLogger(__name__)

# Default activation magnitudes for directional arrows
_ARROW_UP_MAGNITUDE = 0.15
_ARROW_DOWN_MAGNITUDE = -0.15

# Regex for parsing individual delta tokens
# Matches patterns like: D1+0.5, OXT-0.3, SIGMA+0.2, 5HT1A-0.2
_NUMERIC_DELTA_RE = re.compile(r"^([A-Za-z0-9_]+)([+-])(\d+\.?\d*)$")
# Matches: ENT1↑, ACC↑, THYROID↓, PFC_DLPFC↑
_ARROW_DELTA_RE = re.compile(r"^([A-Za-z0-9_]+)(↑|↓)$")
# Matches: SERT.reversed, DAT.reversed, NET.reversed
_REVERSED_RE = re.compile(r"^([A-Za-z0-9_]+)\.reversed$")


# ═══════════════════════════════════════════════════════════════════════
# ABBREVIATION → FULL NODE NAME RESOLUTION
# ═══════════════════════════════════════════════════════════════════════

# Compact abbreviations used in delta strings → canonical NCM node names.
# This table is derived from reality_marble_recursion_index.yaml
# BASE_ABBREV_EXPANSIONS and NCM_NODE_EXPANSIONS.
ABBREV_TO_NODE: Dict[str, str] = {
    # Core hormones / neurotransmitters
    "OXT": "OXYTOCIN_NEUROMIRROR",
    "OXYTOCIN": "OXYTOCIN_NEUROMIRROR",
    "AVP": "VASOPRESSIN_GUARD",
    "VASOPRESSIN": "VASOPRESSIN_GUARD",
    "CORT": "CORTISOL_PRESSURE",
    "CORTISOL": "CORTISOL_PRESSURE",
    "ADR": "ADRENALINE_RUSH",
    "ADRENALINE": "ADRENALINE_RUSH",
    "MELATONIN": "MELATONIN_DARK",
    # Serotonin system
    "5HT": "SEROTONERGIC_WARMTH",
    "5HT1A": "SEROTONIN_5HT1A",
    "5HT2A": "SEROTONIN_5HT2A",
    "5HT2C": "SEROTONIN_5HT2C",
    "5HT3": "SEROTONIN_5HT3",
    "5HT7": "SEROTONIN_5HT7",
    "5HT1E": "SEROTONIN_5HT1E",
    "5HT1F": "SEROTONIN_5HT1F",
    "5HT2B": "SEROTONIN_5HT2B",
    "5HT5A": "SEROTONIN_5HT5A",
    # Dopamine system
    "DA": "DOPAMINERGIC_CRAVE",
    "D1": "DOPAMINE_D1",
    "D2": "DOPAMINE_D2",
    "D4": "DOPAMINE_D4",
    "D5": "DOPAMINE_D5",
    # Norepinephrine
    "NE": "NORADRENERGIC_VIGILANCE",
    # GABA / glutamate
    "GABA": "GABA_ERGIC_CALM",
    "NMDA": "NMDA_CORE",
    "ACh": "ACETYLCHOLINE_FOCUS",
    # Opioid system
    "MOR": "MU_OPIOID_MOR",
    "KOR": "KAPPA_OPIOID_KOR",
    # Other core nodes
    "SIGMA": "SIGMA_RECEPTOR_META",
    "DMT": "DMT_ENDOGENOUS",
    "TAAR": "TAAR_TRACE_SALIENCE",
    "THYROID": "THYROID_T3T4_TEMPO",
    "HISTAMINE": "HISTAMINE_ALERT",
    # Transporters
    "SERT": "SERT_ACTIVITY",
    "DAT": "DAT_ACTIVITY",
    "NET": "NET_ACTIVITY",
    "VMAT2": "VMAT2_PACK",
    "EAAT2": "EAAT2_CLEAR",
    "ENT1": "ENT1_ACTIVITY",
    "ABCB1": "ABCB1_EFFLUX",
    "ZnT3": "ZnT3_PACK",
    "PMAT": "PMAT_UPTAKE",
    "OCT3": "OCT3_FLUX",
    "MAO_A": "MAO_A_ACTIVITY",
    "COMT": "COMT_ACTIVITY",
    # Brain regions
    "ACC": "ACC_SAL",
    "NACC": "NACC_VENTRAL_STR",
    "PFC": "PFC_DLPFC",
    "DLPFC": "PFC_DLPFC",
    "LC": "LC_NOR",
    "DRN": "RAPHE_DRN",
    "MPFC": "PFC_DLPFC",
    "OFC": "PFC_DLPFC",
    "PCC": "PFC_DLPFC",
    "TPJ": "PFC_DLPFC",
    "VTA": "VTA",
    "AMYGDALA": "AMYGDALA",
    "HIPPOCAMPUS": "HIPPOCAMPUS",
    "HYPOTHALAMUS": "HYPOTHALAMUS",
    "PAG": "PAG",
    "THALAMUS": "THALAMUS",
    "INSULA": "INSULA_INTERO",
    # Chloride homeostasis
    "NKCC1": "NKCC1_CHLORIDE",
    "KCC2": "KCC2_CHLORIDE",
    # Endocannabinoid / opioid (vape cart aliases)
    "CB1": "ENDOCANNABINOID_CB1",
    "THC": "ENDOCANNABINOID_CB1",  # THC acts primarily via CB1
    # CB2 removed — ENDOCANNABINOID_CB2 not modelled in CNS index
    "ENDO_BLISS": "ENDORPHINIC_BLISS",
    # Hormones / peptides (vape cart aliases)
    "PRL": "PROLACTIN_SATIATION",
    "PROLACTIN": "PROLACTIN_SATIATION",
    "T": "TESTOSTERONE_T",
    "GHSR": "GHSR_GHRELIN",
    "LEPTIN": "LEPTIN_RECEPTOR",
    "OREXIN": "OREXIN_SEEK",
    "SUBST_P": "SUBSTANCE_P_NK1",
    "SUBSTANCE_P": "SUBSTANCE_P_NK1",
    # TRP channels / somatosensory (vape cart aliases)
    "TRPV1": "TRPV1_HEAT",
    "TRPM8": "TRPM8_COOL",
    "TRPA1": "TRPA1_PRICKLE",
    "PIEZO2": "PIEZO2_TOUCH",
    # Other (vape cart aliases)
    "HIST": "HISTAMINE_ALERT",
    "ADENOSINE_A1": "ADENOSINE_A1",
    "MINERALOCORTICOID_MR": "MINERALOCORTICOID_MR",
    # Cholinergic system
    "CHT1": "CHT1_UPTAKE",
    "MUSCARINIC_M3": "MUSCARINIC_M3",
    "MUSCARINIC_M4": "MUSCARINIC_M4",
    # Histaminergic receptors
    "HISTAMINE_H1": "HISTAMINE_H1",
    "HISTAMINE_H2": "HISTAMINE_H2",
    "HISTAMINE_H4": "HISTAMINE_H4",
    # Nucleoside/transporter extensions
    "ENT2": "ENT2_ACTIVITY",
    "ADENOSINE_A2A": "ADENOSINE_A2A",
    # Stress / bonding peptide receptors
    "AVPR1B": "AVPR1B_STRESS",
    "OXTR_PAIR": "OXTR_PAIR",
    # Melatonin / metabolic
    "MELATONIN_MT2": "MELATONIN_MT2",
    "PPAR_GAMMA": "PPAR_GAMMA_EASE",
    # Enzyme clearance
    "MAO_A": "MAO_A_ACTIVITY",
    "COMT": "COMT_ACTIVITY",
    # Glutamate receptors
    "KAINATE": "KAINATE_GluK",
    # Organic cation transporters
    "OCT1": "OCT1_FLUX",
}

# Reversal magnitude for transporter .reversed tokens.
# Negative = clearance disrupted (transporter running backwards).
_REVERSAL_MAGNITUDE = -0.35

# When a transporter is reversed, the parent neurotransmitter floods
# the synapse. Map transporter → (parent NT node, boost magnitude).
_REVERSAL_NT_BOOST: Dict[str, Tuple[str, float]] = {
    "SERT_ACTIVITY": ("SEROTONERGIC_WARMTH", 0.30),
    "DAT_ACTIVITY": ("DOPAMINERGIC_CRAVE", 0.35),
    "NET_ACTIVITY": ("NORADRENERGIC_VIGILANCE", 0.25),
}


[docs] def resolve_node_name(abbrev: str) -> str: """Resolve a compact abbreviation to its canonical NCM node name. Looks the abbreviation up in the module-level ``ABBREV_TO_NODE`` table, which maps shorthand tokens from the recursion index (for example ``"KOR"`` or ``"5HT1A"``) to full neurochemical-model node names such as ``"KAPPA_OPIOID_KOR"``. If there is no mapping the input is returned unchanged, on the assumption it is already a full node name. This is a pure dictionary lookup with no side effects. Called by ``parse_delta_string`` in this module for every parsed token, and by ``flavor_engine.py`` (via a guarded import) when expanding flavor abbreviations. Args: abbrev: The shorthand token or candidate node name to resolve. Returns: str: The canonical node name, or ``abbrev`` unchanged if unmapped. """ return ABBREV_TO_NODE.get(abbrev, abbrev)
# ═══════════════════════════════════════════════════════════════════════ # DELTA STRING PARSER # ═══════════════════════════════════════════════════════════════════════
[docs] def parse_delta_string(delta_str: str) -> Dict[str, float]: """Parse a compact NCM delta string into a resolved chemical vector. Examples:: >>> parse_delta_string("KOR+0.4 D1-0.3 OXT-0.2 ENT1↑ ACC↑") {'KAPPA_OPIOID_KOR': 0.4, 'DOPAMINE_D1': -0.3, 'OXYTOCIN_NEUROMIRROR': -0.2, 'ENT1_ACTIVITY': 0.15, 'ACC_SAL': 0.15} >>> parse_delta_string("SERT.reversed DAT.reversed") {'SERT_ACTIVITY': -0.35, 'SEROTONERGIC_WARMTH': 0.3, 'DAT_ACTIVITY': -0.35, 'DOPAMINERGIC_CRAVE': 0.35} A reversed transporter applies ``_REVERSAL_MAGNITUDE`` (clearance disrupted) and, for transporters in ``_REVERSAL_NT_BOOST``, also boosts the parent neurotransmitter to model the resulting synaptic flood. """ if not delta_str or not delta_str.strip(): return {} result: Dict[str, float] = {} tokens = delta_str.strip().split() for token in tokens: token = token.strip() if not token: continue # Try numeric delta: D1+0.5, OXT-0.3 m = _NUMERIC_DELTA_RE.match(token) if m: abbrev, sign, magnitude = m.groups() node = resolve_node_name(abbrev) val = float(magnitude) if sign == "-": val = -val result[node] = result.get(node, 0.0) + val continue # Try arrow delta: ENT1↑, THYROID↓ m = _ARROW_DELTA_RE.match(token) if m: abbrev, arrow = m.groups() node = resolve_node_name(abbrev) val = _ARROW_UP_MAGNITUDE if arrow == "↑" else _ARROW_DOWN_MAGNITUDE result[node] = result.get(node, 0.0) + val continue # Try reversed transporter: SERT.reversed m = _REVERSED_RE.match(token) if m: abbrev = m.group(1) node = resolve_node_name(abbrev) result[node] = result.get(node, 0.0) + _REVERSAL_MAGNITUDE # Also boost parent neurotransmitter (synaptic flood) if node in _REVERSAL_NT_BOOST: nt_node, nt_mag = _REVERSAL_NT_BOOST[node] result[nt_node] = result.get(nt_node, 0.0) + nt_mag continue # Unrecognized token — try as a bare abbreviation with small activation node = resolve_node_name(token) if node != token: result[node] = result.get(node, 0.0) + 0.1 else: logger.warning("Unrecognized delta token: %s", token) return result
# ═══════════════════════════════════════════════════════════════════════ # EMOTION INDEX LOADER # ═══════════════════════════════════════════════════════════════════════ _emotion_index_cache: Optional[Dict[str, Dict[str, Any]]] = None def _load_emotion_index() -> Dict[str, Dict[str, Any]]: """Load and memoize the emotion-to-delta index from the recursion YAML. Reads ``reality_marble_recursion_index.yaml`` from this file's directory and builds a mapping of normalized emotion name to a record containing its id, cart, affect, raw ``delta`` string, the parsed ``delta_vector`` (via ``parse_delta_string``), emotional weight, and any optional narrative fields. Expansion tables such as ``BASE_ABBREV_EXPANSIONS`` are skipped; only top-level entries that carry a ``delta`` key are treated as emotions. The result is cached in the module-level ``_emotion_index_cache`` so the filesystem read and YAML parse happen at most once per process; subsequent calls return the same dict. A missing file or parse error is logged via the module ``logger`` and cached as an empty dict rather than raising, so callers degrade gracefully. Called by ``get_emotion_delta``, ``get_all_emotions``, and ``scan_text_for_triggers`` within this module. Returns: Dict[str, Dict[str, Any]]: The cached emotion index keyed by normalized (uppercased, underscore-joined) emotion name; empty if the YAML is missing or unreadable. """ global _emotion_index_cache if _emotion_index_cache is not None: return _emotion_index_cache project_root = os.path.dirname(os.path.abspath(__file__)) yaml_path = os.path.join(project_root, "reality_marble_recursion_index.yaml") if not os.path.exists(yaml_path): logger.warning("Recursion index not found at %s", yaml_path) _emotion_index_cache = {} return _emotion_index_cache try: with open(yaml_path, "r", encoding="utf-8") as f: data = yaml.safe_load(f) except Exception as e: logger.error("Failed to load recursion index: %s", e) _emotion_index_cache = {} return _emotion_index_cache # Extract emotion entries (top-level keys that have 'delta' and 'id') emotions: Dict[str, Dict[str, Any]] = {} skip_keys = { "BASE_ABBREV_EXPANSIONS", "NCM_NODE_EXPANSIONS", "REGIONAL_EXPANSIONS", } for key, value in data.items(): if key in skip_keys: continue if isinstance(value, dict) and "delta" in value: name = key.upper().replace(" ", "_") emotions[name] = { "id": value.get("id", -1), "cart": value.get("cart", ""), "affect": value.get("affect", ""), "delta_str": value["delta"], "delta_vector": parse_delta_string(value["delta"]), "emotional_weight": value.get("emotional_weight", ""), } for optional in ("narrative_feel", "hooks", "ui_cues"): if optional in value: emotions[name][optional] = value[optional] _emotion_index_cache = emotions logger.info("Loaded %d emotion delta mappings from recursion index", len(emotions)) return _emotion_index_cache
[docs] def get_emotion_delta(emotion_name: str) -> Dict[str, float]: """Return the parsed NCM delta vector for a named emotion. Normalizes ``emotion_name`` (uppercase, spaces to underscores) and looks it up in the cached emotion index from ``_load_emotion_index``, returning a copy of that emotion's resolved ``delta_vector`` so callers cannot mutate the shared cache. An unknown emotion yields an empty dict rather than raising. Called by ``ncm_semantic_triggers.py`` after semantic matching, to turn each top-scoring emotion name into a concrete chemical delta. Args: emotion_name: Emotion name in any spacing/casing (e.g. ``"warm glow"``). Returns: Dict[str, float]: A fresh copy of the emotion's node-to-magnitude delta vector, or an empty dict if the emotion is not in the index. """ index = _load_emotion_index() name = emotion_name.upper().replace(" ", "_") entry = index.get(name) if entry: return entry["delta_vector"].copy() return {}
[docs] def get_all_emotions() -> Dict[str, Dict[str, Any]]: """Return the full emotion index, loading and caching it on first use. Thin public accessor that delegates to ``_load_emotion_index`` and exposes the complete mapping of emotion name to its parsed record (id, cart, affect, raw and parsed deltas, and optional narrative fields). The returned dict is the live cached object, so callers should treat it as read-only. Called by ``limbic_system/engine.py``, ``ncm_semantic_triggers.py``, and the trigger-pregeneration scripts under ``scripts/`` to enumerate every known emotion and its delta vector. Returns: Dict[str, Dict[str, Any]]: The cached emotion index keyed by normalized emotion name; empty if the recursion YAML is missing or unreadable. """ return _load_emotion_index()
[docs] def scan_text_for_triggers( text: str, trigger_lexicon: Optional[List[str]] = None, ) -> List[Tuple[str, Dict[str, float]]]: """Scan text for emotional trigger words and return matched deltas. Parameters ---------- text: The text to scan (user message or bot response). trigger_lexicon: Optional explicit list of trigger words. If None, uses all emotion names from the recursion index. Returns ------- List of (emotion_name, delta_vector) tuples for each match. """ index = _load_emotion_index() if trigger_lexicon is None: trigger_lexicon = list(index.keys()) text_upper = text.upper() matches: List[Tuple[str, Dict[str, float]]] = [] seen = set() for trigger in trigger_lexicon: trigger_upper = trigger.upper().replace(" ", "_") # Check if the trigger word appears in the text search_term = trigger.upper().replace("_", " ") if search_term in text_upper and trigger_upper not in seen: entry = index.get(trigger_upper) if entry: matches.append((trigger_upper, entry["delta_vector"].copy())) seen.add(trigger_upper) return matches
[docs] def combine_deltas( delta_list: List[Dict[str, float]], scale: float = 1.0, ) -> Dict[str, float]: """Combine multiple delta vectors into one, optionally scaled. When multiple emotions fire simultaneously, their deltas stack but are scaled down to prevent runaway: combined = scale * Σ(delta_i) / sqrt(N) """ if not delta_list: return {} import math combined: Dict[str, float] = {} n = len(delta_list) dampening = 1.0 / math.sqrt(n) if n > 1 else 1.0 for delta in delta_list: for key, val in delta.items(): combined[key] = combined.get(key, 0.0) + val # Apply dampening and scale for key in combined: combined[key] = combined[key] * dampening * scale combined[key] = max(-3.0, min(3.0, combined[key])) return combined