Source code for terpene_engine

"""Terpene Engine -- Cannabis phytochemistry modulation layer.

Loads terpene profiles and strain definitions from terpene_profiles.yaml,
computes composite NCM deltas per strain, resolves entourage synergies,
and provides the sativa/indica gradient interpolation used by the
cascade engine for bipolar ENDOCANNABINOID_DRIFT staging.

# 🔥 the stoner goddess gets her pharmacology right 💀
"""

from __future__ import annotations

import logging
import os
from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import yaml

logger = logging.getLogger(__name__)

# 💀 Delta parsing -- reuse from ncm_delta_parser if available
try:
    from ncm_delta_parser import parse_delta_string, resolve_node_name
except ImportError:
    import re as _re

    _DELTA_RE = _re.compile(r"([A-Za-z0-9_]+)([+-])(\d+\.?\d*)")

    def parse_delta_string(ds: str) -> Dict[str, float]:
        """Minimal stand-in NCM delta parser used when ``ncm_delta_parser`` is absent.

        This pure-Python fallback is defined only inside the ``except ImportError``
        branch, so it is bound to the module-level ``parse_delta_string`` name only
        when the real ``ncm_delta_parser`` package cannot be imported. It tokenises a
        compact delta string such as ``KOR+0.4 D1-0.3`` with the module-local
        ``_DELTA_RE`` regex and sums signed magnitudes per node, deliberately skipping
        the richer features (arrow tokens, ``.reversed`` directives, node-name
        resolution) that the full parser supports. No I/O or side effects.

        Within this module it is invoked by ``TerpeneEngine._ensure_loaded`` (to parse
        each terpene ``delta``) and by ``TerpeneEngine.compute_strain_effect`` (to parse
        each entourage rule ``delta_bonus``); when the real package is present those
        calls hit ``ncm_delta_parser.parse_delta_string`` instead.

        Args:
            ds: Compact delta string of ``NODE±VALUE`` tokens.

        Returns:
            Mapping of node name to summed signed delta magnitude.
        """
        result: Dict[str, float] = {}
        for m in _DELTA_RE.finditer(ds):
            node = m.group(1)
            sign = 1.0 if m.group(2) == "+" else -1.0
            result[node] = result.get(node, 0.0) + sign * float(m.group(3))
        return result

    def resolve_node_name(abbrev: str) -> str:
        """Identity stand-in for node-name resolution when the real parser is absent.

        Bound to the module-level ``resolve_node_name`` only in the
        ``except ImportError`` branch, this fallback simply echoes the abbreviation
        back unchanged rather than mapping a short alias (for example ``CB1``) to a
        canonical NCM node name the way ``ncm_delta_parser.resolve_node_name`` does.
        Pure function with no side effects. It is imported for API parity but is not
        called elsewhere inside this module; the engine relies on
        ``parse_delta_string`` for its own delta handling.

        Args:
            abbrev: Node abbreviation or alias.

        Returns:
            The same string, unmodified.
        """
        return abbrev


# ═══════════════════════════════════════════════════════════════════════
# DATA CLASSES
# ═══════════════════════════════════════════════════════════════════════


[docs] @dataclass class TerpeneProfile: """A single cannabis terpene with its pharmacological and sensory properties. Immutable record describing one aromatic terpene: its sativa/indica ``polarity``, the temperature at which it volatilises, an aroma blurb, the parsed ``ncm_deltas`` (neurochemical-modulation shifts), a dict of 11D gustatory ``flavor_mods``, and a human-readable effects string. Instances are constructed by ``TerpeneEngine._ensure_loaded`` from the ``terpenes`` section of ``terpene_profiles.yaml`` (the ``ncm_deltas`` field is produced by running each YAML ``delta`` string through ``parse_delta_string``). They are looked up via ``TerpeneEngine.get_terpene`` and consumed by ``TerpeneEngine.compute_strain_effect`` when weighting a strain. """ name: str polarity: float # -1.0 (indica) to +1.0 (sativa) boiling_point_c: float # volatilization temperature aroma: str # aromatic description ncm_deltas: Dict[str, float] # parsed NCM delta dict flavor_mods: Dict[str, float] # 11D gustatory axis shifts effects: str # human-readable effect description
[docs] @dataclass class StrainProfile: """A cannabis strain described by its terpene composition and gradient. Immutable record for one strain: its ``strain_gradient`` position on the indica-to-sativa axis (0.0-1.0), a coarse ``classification`` label, average THC percentage, a description, and a ``terpene_weights`` mapping of terpene name to fractional weight. Instances are built by ``TerpeneEngine._ensure_loaded`` from the ``strains`` section of ``terpene_profiles.yaml``, retrieved by ``TerpeneEngine.get_strain``, ranked by ``TerpeneEngine.find_strain_by_gradient``, and read by ``TerpeneEngine.compute_strain_effect`` to drive the per-strain NCM math. """ name: str strain_gradient: float # 0.0 (indica) to 1.0 (sativa) classification: str # indica, sativa, hybrid, etc. thc_pct: float # average THC percentage description: str # human-readable strain description terpene_weights: Dict[str, float] # terpene_name -> weight (0.0-1.0)
[docs] @dataclass class StrainEffect: """The fully computed pharmacological effect of a single cannabis strain. Result bundle produced by ``TerpeneEngine.compute_strain_effect``: it carries the strain name and gradient alongside the ``composite_deltas`` (weighted sum of every terpene's NCM shifts), the ``entourage_bonuses`` contributed by fired synergy rules, the merged-and-saturated ``total_deltas``, the aggregate 11D ``flavor_shifts``, the dominant terpene, a sativa/indica/hybrid ``pole_label``, and the list of entourage rules that activated. This is a derived, read-mostly value object; its ``to_dict`` method renders it for downstream consumers and serialisation. """ strain_name: str strain_gradient: float composite_deltas: Dict[str, float] # 😈 weighted sum of all terpene NCM deltas entourage_bonuses: Dict[str, float] # 🔥 synergy rule bonuses total_deltas: Dict[str, float] # composite + entourage merged flavor_shifts: Dict[str, float] # 🌀 how the strain modifies gustatory axes dominant_terpene: str # highest-weight terpene pole_label: str # "sativa" / "indica" / "hybrid" active_entourage_rules: List[str] # 💀 which synergy rules fired
[docs] def to_dict(self) -> dict: """Render this strain effect as a compact, rounded JSON-friendly dict. Serialises the most useful fields of the ``StrainEffect`` for logging or transport: the strain name, the rounded gradient and pole label, the dominant terpene, the merged ``total_deltas`` and ``flavor_shifts`` (each value rounded to keep the payload readable), and the names of the entourage rules that fired. Note that ``composite_deltas`` and ``entourage_bonuses`` are intentionally omitted in favour of the already-merged ``total_deltas``. Pure transformation over ``self`` with no side effects. Invoked on instances returned by ``TerpeneEngine.compute_strain_effect``; no other module imports this class in the repo yet. Returns: A dict with keys ``strain``, ``gradient``, ``pole``, ``dominant_terpene``, ``deltas``, ``flavor_shifts``, and ``entourage_rules``. """ return { "strain": self.strain_name, "gradient": round(self.strain_gradient, 3), "pole": self.pole_label, "dominant_terpene": self.dominant_terpene, "deltas": {k: round(v, 4) for k, v in self.total_deltas.items()}, "flavor_shifts": {k: round(v, 3) for k, v in self.flavor_shifts.items()}, "entourage_rules": self.active_entourage_rules, }
# ═══════════════════════════════════════════════════════════════════════ # TERPENE ENGINE # ═══════════════════════════════════════════════════════════════════════
[docs] class TerpeneEngine: """Cannabis phytochemistry computation engine. Loads terpene profiles and strains from YAML, computes composite NCM effects, resolves entourage synergies, and provides the bipolar sativa/indica gradient for cascade interpolation. """
[docs] def __init__(self, yaml_path: Optional[str] = None) -> None: """Initialize the instance. Args: yaml_path: Path to terpene_profiles.yaml. Defaults to same directory as this module. """ if yaml_path is None: yaml_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "terpene_profiles.yaml", ) self._yaml_path = yaml_path self._data: Dict[str, Any] = {} self._terpenes: Dict[str, TerpeneProfile] = {} self._strains: Dict[str, StrainProfile] = {} self._entourage_rules: Dict[str, Dict[str, Any]] = {} self._poles: Dict[str, Dict[str, Any]] = {} self._loaded = False
def _ensure_loaded(self) -> None: """Lazily parse ``terpene_profiles.yaml`` into in-memory lookup tables. Performs the engine's one-time hydration: on the first call it reads the YAML file at ``self._yaml_path`` from disk, then populates ``self._terpenes`` (running each terpene ``delta`` through ``parse_delta_string``), ``self._strains``, ``self._entourage_rules``, and ``self._poles`` before flipping the ``self._loaded`` guard so subsequent calls return immediately. A read failure is logged via the module ``logger`` and degrades to empty tables rather than raising, so the engine stays usable. Touches the filesystem (one ``open``) but no Redis, KG, or network. Called by every public accessor and compute method on the engine (``get_terpene``, ``get_strain``, ``list_terpenes``, ``list_strains``, ``compute_strain_effect``, ``compute_gradient_blend``, ``get_pole_info``, ``find_strain_by_gradient``) so construction stays cheap and loading is deferred until first use. Returns: None. """ if self._loaded: return try: with open(self._yaml_path, "r", encoding="utf-8") as f: self._data = yaml.safe_load(f) or {} except Exception as e: logger.error("Failed to load terpene_profiles.yaml: %s", e) self._data = {} self._loaded = True return # 🔥 Parse terpene definitions for name, tdata in self._data.get("terpenes", {}).items(): deltas = parse_delta_string(tdata.get("delta", "")) self._terpenes[name] = TerpeneProfile( name=name, polarity=float(tdata.get("polarity", 0.0)), boiling_point_c=float(tdata.get("boiling_point_c", 180)), aroma=tdata.get("aroma", ""), ncm_deltas=deltas, flavor_mods=tdata.get("flavor_mods", {}), effects=tdata.get("effects", ""), ) # 💀 Parse strain library for name, sdata in self._data.get("strains", {}).items(): self._strains[name] = StrainProfile( name=name, strain_gradient=float(sdata.get("strain_gradient", 0.5)), classification=sdata.get("classification", "hybrid"), thc_pct=float(sdata.get("thc_pct", 20)), description=sdata.get("description", ""), terpene_weights=sdata.get("terpene_weights", {}), ) # 🌀 Parse entourage rules self._entourage_rules = self._data.get("entourage_rules", {}) # Parse pole definitions self._poles = self._data.get("poles", {}) self._loaded = True logger.info( "TerpeneEngine loaded %d terpenes, %d strains, %d entourage rules", len(self._terpenes), len(self._strains), len(self._entourage_rules), ) # ─── PUBLIC API ───────────────────────────────────────────────────
[docs] def get_terpene(self, name: str) -> Optional[TerpeneProfile]: """Look up a single terpene profile by name, case-insensitively. Normalises the requested name to the upper-cased, underscore-joined key convention used in the loaded table and returns the matching ``TerpeneProfile`` (or ``None`` when absent). Triggers ``_ensure_loaded`` first so the YAML is hydrated on demand; otherwise it is a pure dict read with no side effects. Defined on ``TerpeneEngine``, which is not yet imported elsewhere in the repo, so it is reached only via a constructed engine instance (the module is referenced by name in the output header that ``scrape_leafly.py`` writes, but is not yet imported by other code). Args: name: Terpene name; spaces are treated as underscores and case is ignored. Returns: The matching ``TerpeneProfile``, or ``None`` if no such terpene. """ self._ensure_loaded() key = name.upper().replace(" ", "_") return self._terpenes.get(key)
[docs] def get_strain(self, name: str) -> Optional[StrainProfile]: """Look up a single strain profile by name, case- and separator-insensitively. Normalises the requested name by upper-casing and folding both spaces and hyphens to underscores (so ``"Blue Dream"`` and ``"blue-dream"`` resolve to the same key) and returns the matching ``StrainProfile`` or ``None``. Calls ``_ensure_loaded`` to hydrate the YAML on first use; otherwise a pure dict read. Used internally by ``compute_strain_effect`` to resolve the strain it is asked to compute, and otherwise reached via a constructed ``TerpeneEngine`` instance since the module has no other importers in the repo yet. Args: name: Strain name; spaces and hyphens are folded to underscores and case is ignored. Returns: The matching ``StrainProfile``, or ``None`` if no such strain. """ self._ensure_loaded() key = name.upper().replace(" ", "_").replace("-", "_") return self._strains.get(key)
[docs] def list_terpenes(self) -> List[str]: """Return every loaded terpene name in sorted order. Convenience accessor that hydrates the YAML via ``_ensure_loaded`` and returns an alphabetically sorted list of the keys in ``self._terpenes``, giving callers a quick catalogue of the available terpenes. Pure read with no side effects beyond the lazy load. Reached only through a constructed ``TerpeneEngine`` instance, as the module is not imported elsewhere in the repo yet. Returns: Sorted list of terpene names. """ self._ensure_loaded() return sorted(self._terpenes.keys())
[docs] def list_strains(self) -> List[str]: """Return every loaded strain name, sorted by a stable string key. Convenience accessor that hydrates the YAML via ``_ensure_loaded`` and returns the keys of ``self._strains`` sorted with ``str`` as the sort key, guarding against the case where the YAML loader coerces purely numeric strain names into ints (which would otherwise be unsortable against strings). Pure read apart from the lazy load. Reached only through a constructed ``TerpeneEngine`` instance, as nothing else in the repo imports the module yet. Returns: List of strain names sorted by their string form. """ self._ensure_loaded() # YAML may coerce numeric keys to int; sort with a stable string key. return sorted(self._strains.keys(), key=str)
[docs] def compute_strain_effect(self, strain_name: str) -> Optional[StrainEffect]: """Compute the full pharmacological effect of a cannabis strain. Resolves terpene weights to composite NCM deltas, applies entourage synergy rules, computes flavor axis shifts, and determines the sativa/indica pole label. Args: strain_name: Name of the strain to compute. Returns: StrainEffect dataclass or None if strain not found. """ self._ensure_loaded() strain = self.get_strain(strain_name) if strain is None: logger.warning("Unknown strain '%s'", strain_name) return None # 😈 Phase 1: Composite terpene NCM deltas (weighted sum) composite: Dict[str, float] = {} flavor_shifts: Dict[str, float] = {} dominant_terpene = "" dominant_weight = 0.0 for terp_name, weight in strain.terpene_weights.items(): terp = self._terpenes.get(terp_name) if terp is None: logger.warning( "Strain '%s' references unknown terpene '%s'", strain_name, terp_name, ) continue # Track dominant if weight > dominant_weight: dominant_weight = weight dominant_terpene = terp_name # Weighted NCM deltas for node, delta in terp.ncm_deltas.items(): composite[node] = composite.get(node, 0.0) + delta * weight # Weighted flavor axis mods for axis, mod in terp.flavor_mods.items(): flavor_shifts[axis] = flavor_shifts.get(axis, 0.0) + mod * weight # 🔥 Phase 2: Entourage synergy rules entourage_bonuses: Dict[str, float] = {} active_rules: List[str] = [] for rule_name, rule in self._entourage_rules.items(): required = rule.get("requires", []) min_weight = rule.get("min_weight", 0.0) # Check if all required terpenes are present at sufficient weight all_present = True for req_terp in required: tw = strain.terpene_weights.get(req_terp, 0.0) if tw < min_weight: all_present = False break if not all_present: continue # 💀 Rule fires -- parse and accumulate bonus deltas bonus_str = rule.get("delta_bonus", "") bonus_deltas = parse_delta_string(bonus_str) for node, delta in bonus_deltas.items(): entourage_bonuses[node] = entourage_bonuses.get(node, 0.0) + delta active_rules.append(rule_name) # 🌀 Phase 3: Merge composite + entourage total: Dict[str, float] = dict(composite) for node, delta in entourage_bonuses.items(): total[node] = total.get(node, 0.0) + delta # Apply Hill saturation to prevent runaway for k in total: v = total[k] sign = 1.0 if v >= 0 else -1.0 total[k] = sign * _hill(abs(v), k_half=0.5, n=2.0) # Clamp to [-1, 1] total = {k: max(-1.0, min(1.0, v)) for k, v in total.items()} # Determine pole label g = strain.strain_gradient if g >= 0.65: pole_label = "sativa" elif g <= 0.35: pole_label = "indica" else: pole_label = "hybrid" return StrainEffect( strain_name=strain.name, strain_gradient=strain.strain_gradient, composite_deltas=composite, entourage_bonuses=entourage_bonuses, total_deltas=total, flavor_shifts=flavor_shifts, dominant_terpene=dominant_terpene, pole_label=pole_label, active_entourage_rules=active_rules, )
[docs] def compute_gradient_blend( self, gradient: float, ) -> Dict[str, float]: """Interpolate between sativa and indica NCM pole signatures. Used by the cascade engine to lerp ENDOCANNABINOID_DRIFT stage deltas based on strain_gradient position. Args: gradient: 0.0 (pure indica) to 1.0 (pure sativa). Returns: Dict of NCM node -> delta value, interpolated between poles. """ self._ensure_loaded() gradient = max(0.0, min(1.0, gradient)) indica_sig = self._poles.get("indica", {}).get("ncm_signature", {}) sativa_sig = self._poles.get("sativa", {}).get("ncm_signature", {}) # Collect all nodes from both poles all_nodes = set(indica_sig.keys()) | set(sativa_sig.keys()) result: Dict[str, float] = {} for node in all_nodes: indica_val = float(indica_sig.get(node, 0.0)) sativa_val = float(sativa_sig.get(node, 0.0)) # Linear interpolation: gradient=0 -> indica, gradient=1 -> sativa result[node] = indica_val + gradient * (sativa_val - indica_val) return result
[docs] def get_cadence_state(self, gradient: float) -> str: """Return the appropriate cadence state for a strain gradient. Args: gradient: 0.0 (indica) to 1.0 (sativa). Returns: Cadence state name: 'stoned_indica', 'stoned', or 'stoned_sativa'. """ if gradient >= 0.65: return "stoned_sativa" elif gradient <= 0.35: return "stoned_indica" return "stoned"
[docs] def get_pole_info(self, pole: str) -> Dict[str, Any]: """Return a copy of the raw pole definition for ``sativa`` or ``indica``. Hydrates the YAML via ``_ensure_loaded`` and returns a shallow copy of the named pole's entry from ``self._poles`` (typically holding its ``ncm_signature`` and related metadata), or an empty dict when the pole is absent. The copy keeps callers from mutating the engine's loaded state. The same ``ncm_signature`` data is consumed internally by ``compute_gradient_blend`` to interpolate between poles. Reached only via a constructed ``TerpeneEngine`` instance, as the module has no other importers in the repo yet. Args: pole: Pole key, normally ``"sativa"`` or ``"indica"``. Returns: A shallow copy of the pole definition dict, or an empty dict. """ self._ensure_loaded() return dict(self._poles.get(pole, {}))
[docs] def find_strain_by_gradient( self, target_gradient: float, n: int = 3, ) -> List[StrainProfile]: """Find the N strains closest to a target gradient value. Args: target_gradient: Desired sativa/indica position. n: Number of results to return. Returns: List of StrainProfile sorted by distance to target. """ self._ensure_loaded() all_strains = list(self._strains.values()) all_strains.sort( key=lambda s: abs(s.strain_gradient - target_gradient), ) return all_strains[:n]
# ═══════════════════════════════════════════════════════════════════════ # UTILITIES # ═══════════════════════════════════════════════════════════════════════ def _hill(x: float, k_half: float = 0.5, n: float = 2.0) -> float: """Apply Hill-equation saturation to a non-negative magnitude. Evaluates the sigmoidal Hill function ``x**n / (k_half**n + x**n)``, mapping an input magnitude into the ``[0, 1)`` range so that accumulated effects level off rather than growing without bound; ``k_half`` sets the half-saturation point and ``n`` the steepness. Inputs at or below zero short-circuit to ``0.0``. Pure math with no side effects. Called by ``TerpeneEngine.compute_strain_effect`` on the absolute value of each merged delta (sign reapplied afterwards) to keep strain effects bounded; a sibling ``_hill`` in ``flavor_engine.py`` plays the same role there. Args: x: Magnitude to saturate; values ``<= 0`` return ``0.0``. k_half: Half-saturation constant (input at which output is 0.5). n: Hill coefficient controlling curve steepness. Returns: The saturated value in ``[0, 1)``. """ if x <= 0: return 0.0 xn = x**n kn = k_half**n return xn / (kn + xn)