Source code for terpene_engine

"""Terpene Engine -- Cannabis phytochemistry modulation layer.

Loads terpene profiles and strain definitions from terpene_profiles.yaml,
computes composite NCM deltas per strain, resolves entourage synergies,
and provides the sativa/indica gradient interpolation used by the
cascade engine for bipolar ENDOCANNABINOID_DRIFT staging.

# 🔥 the stoner goddess gets her pharmacology right 💀
"""

from __future__ import annotations

import logging
import os
from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import yaml

logger = logging.getLogger(__name__)

# 💀 Delta parsing -- reuse from ncm_delta_parser if available
try:
    from ncm_delta_parser import parse_delta_string, resolve_node_name
except ImportError:
    import re as _re

    _DELTA_RE = _re.compile(r"([A-Za-z0-9_]+)([+-])(\d+\.?\d*)")

    def parse_delta_string(ds: str) -> Dict[str, float]:
        """Minimal stand-in NCM delta parser used when ``ncm_delta_parser`` is absent.

        This pure-Python fallback is defined only inside the ``except ImportError``
        branch, so it is bound to the module-level ``parse_delta_string`` name only
        when the real ``ncm_delta_parser`` package cannot be imported. It tokenises a
        compact delta string such as ``KOR+0.4 D1-0.3`` with the module-local
        ``_DELTA_RE`` regex and sums signed magnitudes per node, deliberately skipping
        the richer features (arrow tokens, ``.reversed`` directives, node-name
        resolution) that the full parser supports. No I/O or side effects.

        Within this module it is invoked by ``TerpeneEngine._ensure_loaded`` (to parse
        each terpene ``delta``) and by ``TerpeneEngine.compute_strain_effect`` (to parse
        each entourage rule ``delta_bonus``); when the real package is present those
        calls hit ``ncm_delta_parser.parse_delta_string`` instead.

        Args:
            ds: Compact delta string of ``NODE±VALUE`` tokens.

        Returns:
            Mapping of node name to summed signed delta magnitude.
        """
        result: Dict[str, float] = {}
        for m in _DELTA_RE.finditer(ds):
            node = m.group(1)
            sign = 1.0 if m.group(2) == "+" else -1.0
            result[node] = result.get(node, 0.0) + sign * float(m.group(3))
        return result

    def resolve_node_name(abbrev: str) -> str:
        """Identity stand-in for node-name resolution when the real parser is absent.

        Bound to the module-level ``resolve_node_name`` only in the
        ``except ImportError`` branch, this fallback simply echoes the abbreviation
        back unchanged rather than mapping a short alias (for example ``CB1``) to a
        canonical NCM node name the way ``ncm_delta_parser.resolve_node_name`` does.
        Pure function with no side effects. It is imported for API parity but is not
        called elsewhere inside this module; the engine relies on
        ``parse_delta_string`` for its own delta handling.

        Args:
            abbrev: Node abbreviation or alias.

        Returns:
            The same string, unmodified.
        """
        return abbrev


# ═══════════════════════════════════════════════════════════════════════
# DATA CLASSES
# ═══════════════════════════════════════════════════════════════════════



[docs]
@dataclass
class TerpeneProfile:
    """A single cannabis terpene with its pharmacological and sensory properties.

    Immutable record describing one aromatic terpene: its sativa/indica
    ``polarity``, the temperature at which it volatilises, an aroma blurb, the
    parsed ``ncm_deltas`` (neurochemical-modulation shifts), a dict of 11D
    gustatory ``flavor_mods``, and a human-readable effects string. Instances are
    constructed by ``TerpeneEngine._ensure_loaded`` from the ``terpenes`` section
    of ``terpene_profiles.yaml`` (the ``ncm_deltas`` field is produced by running
    each YAML ``delta`` string through ``parse_delta_string``). They are looked up
    via ``TerpeneEngine.get_terpene`` and consumed by
    ``TerpeneEngine.compute_strain_effect`` when weighting a strain.
    """

    name: str
    polarity: float  # -1.0 (indica) to +1.0 (sativa)
    boiling_point_c: float  # volatilization temperature
    aroma: str  # aromatic description
    ncm_deltas: Dict[str, float]  # parsed NCM delta dict
    flavor_mods: Dict[str, float]  # 11D gustatory axis shifts
    effects: str  # human-readable effect description




[docs]
@dataclass
class StrainProfile:
    """A cannabis strain described by its terpene composition and gradient.

    Immutable record for one strain: its ``strain_gradient`` position on the
    indica-to-sativa axis (0.0-1.0), a coarse ``classification`` label, average
    THC percentage, a description, and a ``terpene_weights`` mapping of terpene
    name to fractional weight. Instances are built by
    ``TerpeneEngine._ensure_loaded`` from the ``strains`` section of
    ``terpene_profiles.yaml``, retrieved by ``TerpeneEngine.get_strain``, ranked
    by ``TerpeneEngine.find_strain_by_gradient``, and read by
    ``TerpeneEngine.compute_strain_effect`` to drive the per-strain NCM math.
    """

    name: str
    strain_gradient: float  # 0.0 (indica) to 1.0 (sativa)
    classification: str  # indica, sativa, hybrid, etc.
    thc_pct: float  # average THC percentage
    description: str  # human-readable strain description
    terpene_weights: Dict[str, float]  # terpene_name -> weight (0.0-1.0)




[docs]
@dataclass
class StrainEffect:
    """The fully computed pharmacological effect of a single cannabis strain.

    Result bundle produced by ``TerpeneEngine.compute_strain_effect``: it carries
    the strain name and gradient alongside the ``composite_deltas`` (weighted sum
    of every terpene's NCM shifts), the ``entourage_bonuses`` contributed by fired
    synergy rules, the merged-and-saturated ``total_deltas``, the aggregate 11D
    ``flavor_shifts``, the dominant terpene, a sativa/indica/hybrid ``pole_label``,
    and the list of entourage rules that activated. This is a derived,
    read-mostly value object; its ``to_dict`` method renders it for downstream
    consumers and serialisation.
    """

    strain_name: str
    strain_gradient: float
    composite_deltas: Dict[str, float]  # 😈 weighted sum of all terpene NCM deltas
    entourage_bonuses: Dict[str, float]  # 🔥 synergy rule bonuses
    total_deltas: Dict[str, float]  # composite + entourage merged
    flavor_shifts: Dict[str, float]  # 🌀 how the strain modifies gustatory axes
    dominant_terpene: str  # highest-weight terpene
    pole_label: str  # "sativa" / "indica" / "hybrid"
    active_entourage_rules: List[str]  # 💀 which synergy rules fired


[docs]
    def to_dict(self) -> dict:
        """Render this strain effect as a compact, rounded JSON-friendly dict.

        Serialises the most useful fields of the ``StrainEffect`` for logging or
        transport: the strain name, the rounded gradient and pole label, the
        dominant terpene, the merged ``total_deltas`` and ``flavor_shifts`` (each
        value rounded to keep the payload readable), and the names of the entourage
        rules that fired. Note that ``composite_deltas`` and ``entourage_bonuses``
        are intentionally omitted in favour of the already-merged
        ``total_deltas``. Pure transformation over ``self`` with no side effects.
        Invoked on instances returned by ``TerpeneEngine.compute_strain_effect``;
        no other module imports this class in the repo yet.

        Returns:
            A dict with keys ``strain``, ``gradient``, ``pole``,
            ``dominant_terpene``, ``deltas``, ``flavor_shifts``, and
            ``entourage_rules``.
        """
        return {
            "strain": self.strain_name,
            "gradient": round(self.strain_gradient, 3),
            "pole": self.pole_label,
            "dominant_terpene": self.dominant_terpene,
            "deltas": {k: round(v, 4) for k, v in self.total_deltas.items()},
            "flavor_shifts": {k: round(v, 3) for k, v in self.flavor_shifts.items()},
            "entourage_rules": self.active_entourage_rules,
        }




# ═══════════════════════════════════════════════════════════════════════
# TERPENE ENGINE
# ═══════════════════════════════════════════════════════════════════════



[docs]
class TerpeneEngine:
    """Cannabis phytochemistry computation engine.

    Loads terpene profiles and strains from YAML, computes composite
    NCM effects, resolves entourage synergies, and provides the bipolar
    sativa/indica gradient for cascade interpolation.
    """


[docs]
    def __init__(self, yaml_path: Optional[str] = None) -> None:
        """Initialize the instance.

        Args:
            yaml_path: Path to terpene_profiles.yaml. Defaults to
                       same directory as this module.
        """
        if yaml_path is None:
            yaml_path = os.path.join(
                os.path.dirname(os.path.abspath(__file__)),
                "terpene_profiles.yaml",
            )
        self._yaml_path = yaml_path
        self._data: Dict[str, Any] = {}
        self._terpenes: Dict[str, TerpeneProfile] = {}
        self._strains: Dict[str, StrainProfile] = {}
        self._entourage_rules: Dict[str, Dict[str, Any]] = {}
        self._poles: Dict[str, Dict[str, Any]] = {}
        self._loaded = False


    def _ensure_loaded(self) -> None:
        """Lazily parse ``terpene_profiles.yaml`` into in-memory lookup tables.

        Performs the engine's one-time hydration: on the first call it reads the
        YAML file at ``self._yaml_path`` from disk, then populates ``self._terpenes``
        (running each terpene ``delta`` through ``parse_delta_string``),
        ``self._strains``, ``self._entourage_rules``, and ``self._poles`` before
        flipping the ``self._loaded`` guard so subsequent calls return immediately.
        A read failure is logged via the module ``logger`` and degrades to empty
        tables rather than raising, so the engine stays usable. Touches the
        filesystem (one ``open``) but no Redis, KG, or network.

        Called by every public accessor and compute method on the engine
        (``get_terpene``, ``get_strain``, ``list_terpenes``, ``list_strains``,
        ``compute_strain_effect``, ``compute_gradient_blend``, ``get_pole_info``,
        ``find_strain_by_gradient``) so construction stays cheap and loading is
        deferred until first use.

        Returns:
            None.
        """
        if self._loaded:
            return
        try:
            with open(self._yaml_path, "r", encoding="utf-8") as f:
                self._data = yaml.safe_load(f) or {}
        except Exception as e:
            logger.error("Failed to load terpene_profiles.yaml: %s", e)
            self._data = {}
            self._loaded = True
            return

        # 🔥 Parse terpene definitions
        for name, tdata in self._data.get("terpenes", {}).items():
            deltas = parse_delta_string(tdata.get("delta", ""))
            self._terpenes[name] = TerpeneProfile(
                name=name,
                polarity=float(tdata.get("polarity", 0.0)),
                boiling_point_c=float(tdata.get("boiling_point_c", 180)),
                aroma=tdata.get("aroma", ""),
                ncm_deltas=deltas,
                flavor_mods=tdata.get("flavor_mods", {}),
                effects=tdata.get("effects", ""),
            )

        # 💀 Parse strain library
        for name, sdata in self._data.get("strains", {}).items():
            self._strains[name] = StrainProfile(
                name=name,
                strain_gradient=float(sdata.get("strain_gradient", 0.5)),
                classification=sdata.get("classification", "hybrid"),
                thc_pct=float(sdata.get("thc_pct", 20)),
                description=sdata.get("description", ""),
                terpene_weights=sdata.get("terpene_weights", {}),
            )

        # 🌀 Parse entourage rules
        self._entourage_rules = self._data.get("entourage_rules", {})

        # Parse pole definitions
        self._poles = self._data.get("poles", {})

        self._loaded = True
        logger.info(
            "TerpeneEngine loaded %d terpenes, %d strains, %d entourage rules",
            len(self._terpenes),
            len(self._strains),
            len(self._entourage_rules),
        )

    # ─── PUBLIC API ───────────────────────────────────────────────────


[docs]
    def get_terpene(self, name: str) -> Optional[TerpeneProfile]:
        """Look up a single terpene profile by name, case-insensitively.

        Normalises the requested name to the upper-cased, underscore-joined key
        convention used in the loaded table and returns the matching
        ``TerpeneProfile`` (or ``None`` when absent). Triggers ``_ensure_loaded``
        first so the YAML is hydrated on demand; otherwise it is a pure dict read
        with no side effects. Defined on ``TerpeneEngine``, which is not yet
        imported elsewhere in the repo, so it is reached only via a constructed
        engine instance (the module is referenced by name in the output header that
        ``scrape_leafly.py`` writes, but is not yet imported by other code).

        Args:
            name: Terpene name; spaces are treated as underscores and case is
                ignored.

        Returns:
            The matching ``TerpeneProfile``, or ``None`` if no such terpene.
        """
        self._ensure_loaded()
        key = name.upper().replace(" ", "_")
        return self._terpenes.get(key)



[docs]
    def get_strain(self, name: str) -> Optional[StrainProfile]:
        """Look up a single strain profile by name, case- and separator-insensitively.

        Normalises the requested name by upper-casing and folding both spaces and
        hyphens to underscores (so ``"Blue Dream"`` and ``"blue-dream"`` resolve to
        the same key) and returns the matching ``StrainProfile`` or ``None``. Calls
        ``_ensure_loaded`` to hydrate the YAML on first use; otherwise a pure dict
        read. Used internally by ``compute_strain_effect`` to resolve the strain it
        is asked to compute, and otherwise reached via a constructed ``TerpeneEngine``
        instance since the module has no other importers in the repo yet.

        Args:
            name: Strain name; spaces and hyphens are folded to underscores and
                case is ignored.

        Returns:
            The matching ``StrainProfile``, or ``None`` if no such strain.
        """
        self._ensure_loaded()
        key = name.upper().replace(" ", "_").replace("-", "_")
        return self._strains.get(key)



[docs]
    def list_terpenes(self) -> List[str]:
        """Return every loaded terpene name in sorted order.

        Convenience accessor that hydrates the YAML via ``_ensure_loaded`` and
        returns an alphabetically sorted list of the keys in ``self._terpenes``,
        giving callers a quick catalogue of the available terpenes. Pure read with
        no side effects beyond the lazy load. Reached only through a constructed
        ``TerpeneEngine`` instance, as the module is not imported elsewhere in the
        repo yet.

        Returns:
            Sorted list of terpene names.
        """
        self._ensure_loaded()
        return sorted(self._terpenes.keys())



[docs]
    def list_strains(self) -> List[str]:
        """Return every loaded strain name, sorted by a stable string key.

        Convenience accessor that hydrates the YAML via ``_ensure_loaded`` and
        returns the keys of ``self._strains`` sorted with ``str`` as the sort key,
        guarding against the case where the YAML loader coerces purely numeric
        strain names into ints (which would otherwise be unsortable against
        strings). Pure read apart from the lazy load. Reached only through a
        constructed ``TerpeneEngine`` instance, as nothing else in the repo imports
        the module yet.

        Returns:
            List of strain names sorted by their string form.
        """
        self._ensure_loaded()
        # YAML may coerce numeric keys to int; sort with a stable string key.
        return sorted(self._strains.keys(), key=str)



[docs]
    def compute_strain_effect(self, strain_name: str) -> Optional[StrainEffect]:
        """Compute the full pharmacological effect of a cannabis strain.

        Resolves terpene weights to composite NCM deltas, applies
        entourage synergy rules, computes flavor axis shifts, and
        determines the sativa/indica pole label.

        Args:
            strain_name: Name of the strain to compute.

        Returns:
            StrainEffect dataclass or None if strain not found.
        """
        self._ensure_loaded()
        strain = self.get_strain(strain_name)
        if strain is None:
            logger.warning("Unknown strain '%s'", strain_name)
            return None

        # 😈 Phase 1: Composite terpene NCM deltas (weighted sum)
        composite: Dict[str, float] = {}
        flavor_shifts: Dict[str, float] = {}
        dominant_terpene = ""
        dominant_weight = 0.0

        for terp_name, weight in strain.terpene_weights.items():
            terp = self._terpenes.get(terp_name)
            if terp is None:
                logger.warning(
                    "Strain '%s' references unknown terpene '%s'",
                    strain_name,
                    terp_name,
                )
                continue

            # Track dominant
            if weight > dominant_weight:
                dominant_weight = weight
                dominant_terpene = terp_name

            # Weighted NCM deltas
            for node, delta in terp.ncm_deltas.items():
                composite[node] = composite.get(node, 0.0) + delta * weight

            # Weighted flavor axis mods
            for axis, mod in terp.flavor_mods.items():
                flavor_shifts[axis] = flavor_shifts.get(axis, 0.0) + mod * weight

        # 🔥 Phase 2: Entourage synergy rules
        entourage_bonuses: Dict[str, float] = {}
        active_rules: List[str] = []

        for rule_name, rule in self._entourage_rules.items():
            required = rule.get("requires", [])
            min_weight = rule.get("min_weight", 0.0)

            # Check if all required terpenes are present at sufficient weight
            all_present = True
            for req_terp in required:
                tw = strain.terpene_weights.get(req_terp, 0.0)
                if tw < min_weight:
                    all_present = False
                    break

            if not all_present:
                continue

            # 💀 Rule fires -- parse and accumulate bonus deltas
            bonus_str = rule.get("delta_bonus", "")
            bonus_deltas = parse_delta_string(bonus_str)
            for node, delta in bonus_deltas.items():
                entourage_bonuses[node] = entourage_bonuses.get(node, 0.0) + delta
            active_rules.append(rule_name)

        # 🌀 Phase 3: Merge composite + entourage
        total: Dict[str, float] = dict(composite)
        for node, delta in entourage_bonuses.items():
            total[node] = total.get(node, 0.0) + delta

        # Apply Hill saturation to prevent runaway
        for k in total:
            v = total[k]
            sign = 1.0 if v >= 0 else -1.0
            total[k] = sign * _hill(abs(v), k_half=0.5, n=2.0)

        # Clamp to [-1, 1]
        total = {k: max(-1.0, min(1.0, v)) for k, v in total.items()}

        # Determine pole label
        g = strain.strain_gradient
        if g >= 0.65:
            pole_label = "sativa"
        elif g <= 0.35:
            pole_label = "indica"
        else:
            pole_label = "hybrid"

        return StrainEffect(
            strain_name=strain.name,
            strain_gradient=strain.strain_gradient,
            composite_deltas=composite,
            entourage_bonuses=entourage_bonuses,
            total_deltas=total,
            flavor_shifts=flavor_shifts,
            dominant_terpene=dominant_terpene,
            pole_label=pole_label,
            active_entourage_rules=active_rules,
        )



[docs]
    def compute_gradient_blend(
        self,
        gradient: float,
    ) -> Dict[str, float]:
        """Interpolate between sativa and indica NCM pole signatures.

        Used by the cascade engine to lerp ENDOCANNABINOID_DRIFT stage
        deltas based on strain_gradient position.

        Args:
            gradient: 0.0 (pure indica) to 1.0 (pure sativa).

        Returns:
            Dict of NCM node -> delta value, interpolated between poles.
        """
        self._ensure_loaded()
        gradient = max(0.0, min(1.0, gradient))

        indica_sig = self._poles.get("indica", {}).get("ncm_signature", {})
        sativa_sig = self._poles.get("sativa", {}).get("ncm_signature", {})

        # Collect all nodes from both poles
        all_nodes = set(indica_sig.keys()) | set(sativa_sig.keys())
        result: Dict[str, float] = {}

        for node in all_nodes:
            indica_val = float(indica_sig.get(node, 0.0))
            sativa_val = float(sativa_sig.get(node, 0.0))
            # Linear interpolation: gradient=0 -> indica, gradient=1 -> sativa
            result[node] = indica_val + gradient * (sativa_val - indica_val)

        return result



[docs]
    def get_cadence_state(self, gradient: float) -> str:
        """Return the appropriate cadence state for a strain gradient.

        Args:
            gradient: 0.0 (indica) to 1.0 (sativa).

        Returns:
            Cadence state name: 'stoned_indica', 'stoned', or
            'stoned_sativa'.
        """
        if gradient >= 0.65:
            return "stoned_sativa"
        elif gradient <= 0.35:
            return "stoned_indica"
        return "stoned"



[docs]
    def get_pole_info(self, pole: str) -> Dict[str, Any]:
        """Return a copy of the raw pole definition for ``sativa`` or ``indica``.

        Hydrates the YAML via ``_ensure_loaded`` and returns a shallow copy of the
        named pole's entry from ``self._poles`` (typically holding its
        ``ncm_signature`` and related metadata), or an empty dict when the pole is
        absent. The copy keeps callers from mutating the engine's loaded state. The
        same ``ncm_signature`` data is consumed internally by
        ``compute_gradient_blend`` to interpolate between poles. Reached only via a
        constructed ``TerpeneEngine`` instance, as the module has no other importers
        in the repo yet.

        Args:
            pole: Pole key, normally ``"sativa"`` or ``"indica"``.

        Returns:
            A shallow copy of the pole definition dict, or an empty dict.
        """
        self._ensure_loaded()
        return dict(self._poles.get(pole, {}))



[docs]
    def find_strain_by_gradient(
        self,
        target_gradient: float,
        n: int = 3,
    ) -> List[StrainProfile]:
        """Find the N strains closest to a target gradient value.

        Args:
            target_gradient: Desired sativa/indica position.
            n: Number of results to return.

        Returns:
            List of StrainProfile sorted by distance to target.
        """
        self._ensure_loaded()
        all_strains = list(self._strains.values())
        all_strains.sort(
            key=lambda s: abs(s.strain_gradient - target_gradient),
        )
        return all_strains[:n]




# ═══════════════════════════════════════════════════════════════════════
# UTILITIES
# ═══════════════════════════════════════════════════════════════════════


def _hill(x: float, k_half: float = 0.5, n: float = 2.0) -> float:
    """Apply Hill-equation saturation to a non-negative magnitude.

    Evaluates the sigmoidal Hill function ``x**n / (k_half**n + x**n)``, mapping an
    input magnitude into the ``[0, 1)`` range so that accumulated effects level off
    rather than growing without bound; ``k_half`` sets the half-saturation point and
    ``n`` the steepness. Inputs at or below zero short-circuit to ``0.0``. Pure math
    with no side effects. Called by ``TerpeneEngine.compute_strain_effect`` on the
    absolute value of each merged delta (sign reapplied afterwards) to keep strain
    effects bounded; a sibling ``_hill`` in ``flavor_engine.py`` plays the same role
    there.

    Args:
        x: Magnitude to saturate; values ``<= 0`` return ``0.0``.
        k_half: Half-saturation constant (input at which output is 0.5).
        n: Hill coefficient controlling curve steepness.

    Returns:
        The saturated value in ``[0, 1)``.
    """
    if x <= 0:
        return 0.0
    xn = x**n
    kn = k_half**n
    return xn / (kn + xn)