"""Cadence Post-Processor -- code-level text seasoning for altered states.
When the NCM vector exceeds normal range and a cadence profile fires
(barred_out, tweaking, nodding, etc.), this module lightly adjusts
Star's clean LLM output to reinforce cadence effects that LLMs underdo
(subtle typos, spacing slips, mild vowel stretch, etc.).
The main LLM already gets the cadence directive + voice sample in its
system prompt -- that handles semantic stuff (vocabulary, tone, stumbling
thoughts). This module adds a thin mechanical layer on top; it is tuned
to stay readable and preserve markdown/code structure.
# she breaks her own words, feral style — but the room stays standing
"""
from __future__ import annotations
import logging
import random
import re
from typing import Any, Dict
logger = logging.getLogger(__name__)
# nearby keys on QWERTY for realistic typos
_NEARBY_KEYS: Dict[str, str] = {
"a": "sqwz",
"b": "vngh",
"c": "xdfv",
"d": "sfce",
"e": "wrd",
"f": "dgcv",
"g": "fhtb",
"h": "gjyn",
"i": "ujko",
"j": "hknu",
"k": "jlmi",
"l": "kop",
"m": "njk",
"n": "bmhj",
"o": "iplk",
"p": "ol",
"q": "wa",
"r": "etf",
"s": "adwz",
"t": "rgy",
"u": "yji",
"v": "cfgb",
"w": "qeas",
"x": "zsdc",
"y": "tuh",
"z": "xas",
}
_VOWELS = set("aeiouAEIOU")
_CONSONANTS = set("bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ")
# symbols that can randomly replace chars at high degradation
_SYMBOL_SUBS: Dict[str, str] = {
"a": "@",
"s": "$",
"e": "3",
"i": "1",
"o": "0",
"l": "|",
"t": "+",
"n": "~",
}
# stretchable vowel combos
_STRETCHABLE = re.compile(r"([aeiou])\1{0,2}", re.IGNORECASE)
# Characters that usually mean "markdown / markup boundary" — skip typos and
# mid-word breaks beside them so lists, emphasis, and links stay intact.
_MD_EDGE_CHARS = frozenset("*_[]()#`<>|\\")
# Regex for splitting body text: leave code, links, URLs, and thought XML alone.
_CADENCE_PROTECTED_SEGMENTS = re.compile(
r"("
r"```[\s\S]*?```"
r"|`[^`\n]*`"
r"|\[[^\]]*\]\([^)]*\)"
r"|https?://[^\s<>\)]+"
r"|<(?:thinking|thought|think|prompt_refinement|internal_reasoning"
r"|self_reflection|chain_of_thought|reasoning|scratchpad)>"
r".*?"
r"</(?:thinking|thought|think|prompt_refinement|internal_reasoning"
r"|self_reflection|chain_of_thought|reasoning|scratchpad)>"
r")",
re.DOTALL | re.IGNORECASE,
)
def _char_touching_markup(chars: list[str], i: int) -> bool:
"""Report whether a character sits on or beside a markdown boundary.
Guards the character-level transforms from corrupting structure: a typo,
swap, or mid-word break next to ``*``, ``_``, ``[``, a backtick, etc. would
mangle emphasis, links, lists, or code, so the transforms skip those spots.
The check is horizontal only — it inspects the character at *i* and its
immediate left and right neighbours against the ``_MD_EDGE_CHARS`` set, with
no awareness of multi-line block structure.
Called by ``_apply_typos`` while it walks the character list deciding which
letters are safe to slip; a pure helper with no side effects.
Args:
chars (list[str]): The text exploded into a list of single characters.
i (int): Index into *chars* to test.
Returns:
bool: True when *i* is out of range is handled as False; otherwise True
if *chars[i]* is a markup edge character or is directly adjacent to one.
"""
if i < 0 or i >= len(chars):
return False
if chars[i] in _MD_EDGE_CHARS:
return True
if i > 0 and chars[i - 1] in _MD_EDGE_CHARS:
return True
if i + 1 < len(chars) and chars[i + 1] in _MD_EDGE_CHARS:
return True
return False
def _token_is_prose_only(token: str) -> bool:
"""Report whether a whitespace-delimited token is safe to degrade as prose.
Keeps the word-level transforms from breaking machine-readable text: a token
that looks like a URL, a filesystem path, a markdown fragment, or a heading
marker must stay intact, so this returns False for it and True only for plain
prose words. The detection is heuristic — it scans for ``http://`` / scheme
substrings, slashes, any ``_MD_EDGE_CHARS`` character, and a leading ``#``.
Called by ``_inject_spaces``, ``_inject_linebreaks``, ``_add_mid_punctuation``,
and ``_pluralize_nouns`` before they touch a candidate word; a pure helper
with no side effects.
Args:
token (str): A single whitespace-delimited token from the body text.
Returns:
bool: True only when the token reads as ordinary prose; False for empty
tokens, URLs, paths, markup fragments, or ``#``-prefixed headings.
"""
if not token:
return False
t = token.lower()
if "http://" in t or "https://" in t or "www." in t:
return False
if "/" in token or "\\" in token:
return False
if any(ch in token for ch in _MD_EDGE_CHARS):
return False
if token.startswith("#") and len(token) > 1:
return False
return True
# psychedelic emojis for linebreak injection
_PSYCHEDELIC_EMOJIS = [
"\U0001f308", # rainbow
"\U0001f441", # eye
"\u2728", # sparkles
"\U0001f344", # mushroom
"\U0001f300", # cyclone/spiral
"\U0001f30c", # milky way
"\U0001f31f", # glowing star
"\U0001f30a", # wave
]
# simple pluralization rules
_PLURAL_IRREGULARS: Dict[str, str] = {
"man": "men",
"woman": "women",
"child": "children",
"foot": "feet",
"tooth": "teeth",
"mouse": "mice",
"goose": "geese",
"person": "people",
"self": "selves",
}
# =====================================================================
# Transform functions
# =====================================================================
def _apply_typos(text: str, rate: float) -> str:
"""Inject character-level typos into prose at the given rate (0.0-1.0).
The mechanical heart of the typo texture: walks the text character by
character and, per alphabetic character, rolls against *rate* to decide
whether to corrupt it. The corruption is weighted toward readable slips —
adjacent-letter swaps and ``_NEARBY_KEYS`` fat-finger substitutions dominate,
while stutters, dropped characters, and ``_SYMBOL_SUBS`` leetspeak are a thin
tail — so output stays legible. Characters on or beside markdown boundaries
are skipped via ``_char_touching_markup`` so structure survives, and
whitespace and punctuation are never touched.
Called by ``CadencePostProcessor.process`` as transform step 10, applied per
prose segment after the protected code/link/CoT spans are split out. Pure
apart from consuming the module ``random`` generator.
Args:
text (str): The prose segment to corrupt.
rate (float): Per-character typo probability in 0.0-1.0; values <= 0
return the text unchanged.
Returns:
str: The text with typos injected (or the original when *rate* <= 0).
"""
if rate <= 0:
return text
chars = list(text)
result = []
i = 0
while i < len(chars):
c = chars[i]
# don't corrupt whitespace, newlines, or punctuation
if not c.isalpha() or random.random() > rate:
result.append(c)
i += 1
continue
if _char_touching_markup(chars, i):
result.append(c)
i += 1
continue
roll = random.random()
# Mostly harmless-looking slips; structure-breaking ops are a sliver.
if roll < 0.32 and i + 1 < len(chars) and chars[i + 1].isalpha():
if not _char_touching_markup(chars, i + 1):
# swap adjacent
result.append(chars[i + 1])
result.append(c)
i += 2
else:
result.append(c)
i += 1
elif roll < 0.77:
# nearby key substitution (reads as a fat-finger typo)
lower = c.lower()
if lower in _NEARBY_KEYS:
replacement = random.choice(_NEARBY_KEYS[lower])
result.append(replacement if c.islower() else replacement.upper())
else:
result.append(c)
i += 1
elif roll < 0.90:
# light duplicate (stutter), not whole-word doubling
result.append(c)
result.append(c)
i += 1
elif roll < 0.97:
# drop the char entirely (harsh — keep rare)
i += 1
else:
# symbol substitution — very rare; only when mapping exists
lower = c.lower()
if lower in _SYMBOL_SUBS and random.random() < 0.35:
result.append(_SYMBOL_SUBS[lower])
else:
result.append(c)
i += 1
return "".join(result)
def _drop_vowels(text: str, rate: float) -> str:
"""Randomly delete vowels from the text at the given rate.
Thins words toward a slurred, dropped-syllable look used by the nodding,
dissociated, and psychedelic states. Each vowel (from ``_VOWELS``) is removed
independently with probability *rate*; non-vowel characters always survive,
so spacing and punctuation are preserved.
Called by ``CadencePostProcessor.process`` as transform step 9 (alongside
``_drop_consonants``). Pure apart from consuming the module ``random``
generator.
Args:
text (str): The prose segment to thin.
rate (float): Per-vowel drop probability in 0.0-1.0; values <= 0 return
the text unchanged.
Returns:
str: The text with some vowels removed (or the original when *rate* <= 0).
"""
if rate <= 0:
return text
return "".join(
c if (c not in _VOWELS or random.random() > rate) else "" for c in text
)
def _drop_consonants(text: str, rate: float) -> str:
"""Randomly delete consonants from the text at the given rate.
The consonant counterpart to ``_drop_vowels``, contributing to the
smeared/garbled texture of the barred-out, k-hole, and indica states. Each
consonant (from ``_CONSONANTS``) is removed independently with probability
*rate*; everything else survives.
Called by ``CadencePostProcessor.process`` as transform step 9 (alongside
``_drop_vowels``). Pure apart from consuming the module ``random`` generator.
Args:
text (str): The prose segment to thin.
rate (float): Per-consonant drop probability in 0.0-1.0; values <= 0
return the text unchanged.
Returns:
str: The text with some consonants removed (or the original when
*rate* <= 0).
"""
if rate <= 0:
return text
return "".join(
c if (c not in _CONSONANTS or random.random() > rate) else "" for c in text
)
def _inject_spaces(text: str, rate: float) -> str:
"""Insert random spaces mid-word to fracture long words.
Produces the fragmented "ret ard ed" look of the barred-out, k-hole, and
dissociated states by breaking words apart mid-stride. Only words longer than
four characters are eligible, the break never lands at a word start, and the
candidate fragment must pass ``_token_is_prose_only`` so URLs, paths, and
markup are left whole; each eligible position breaks with probability *rate*.
Called by ``CadencePostProcessor.process`` as transform step 11. Pure apart
from consuming the module ``random`` generator.
Args:
text (str): The prose segment to fracture.
rate (float): Per-position break probability in 0.0-1.0; values <= 0
return the text unchanged.
Returns:
str: The text with mid-word spaces inserted (or the original when
*rate* <= 0).
"""
if rate <= 0:
return text
result = []
in_word = False
word_len = 0
word_buf: list[str] = []
for c in text:
if c.isalpha():
in_word = True
word_len += 1
word_buf.append(c)
result.append(c)
# only break words longer than 4 chars, and not at the start
w = "".join(word_buf)
if word_len > 4 and random.random() < rate and _token_is_prose_only(w):
result.append(" ")
word_len = 0
word_buf = []
else:
in_word = False
word_len = 0
word_buf = []
result.append(c)
return "".join(result)
def _inject_linebreaks(text: str, rate: float) -> str:
"""Insert random newlines mid-word for the drunken-linebreak effect.
The newline sibling of ``_inject_spaces``: snaps long words across line
boundaries to mimic a typist losing the thread (drunk, nodding, k-hole,
indica states). It is deliberately rarer and more conservative — only words
longer than six characters qualify, the effective probability is *rate*
scaled by 0.55, and the fragment must pass ``_token_is_prose_only`` so URLs,
paths, and markup stay on one line.
Called by ``CadencePostProcessor.process`` as transform step 12. Pure apart
from consuming the module ``random`` generator.
Args:
text (str): The prose segment to break across lines.
rate (float): Base per-position break probability in 0.0-1.0 (scaled by
0.55 internally); values <= 0 return the text unchanged.
Returns:
str: The text with mid-word newlines inserted (or the original when
*rate* <= 0).
"""
if rate <= 0:
return text
result = []
in_word = False
word_len = 0
word_buf: list[str] = []
for c in text:
if c.isalpha():
in_word = True
word_len += 1
word_buf.append(c)
result.append(c)
w = "".join(word_buf)
if (
word_len > 6
and random.random() < rate * 0.55
and _token_is_prose_only(w)
):
result.append("\n")
word_len = 0
word_buf = []
else:
in_word = False
word_len = 0
word_buf = []
result.append(c)
return "".join(result)
def _mangle_caps(text: str, mode: str) -> str:
"""Apply capitalization mangling.
Modes:
- 'lower': force all lowercase
- 'random': random caps bursts (LIKE THIS)
- 'shout': occasional RANDOM CAPS on words
- 'none': no change
"""
if mode == "lower" or mode == "gone":
return text.lower()
elif mode == "random":
result = []
in_burst = False
for c in text:
if c.isalpha():
if not in_burst and random.random() < 0.035:
in_burst = True
elif in_burst and random.random() < 0.22:
in_burst = False
result.append(c.upper() if in_burst else c.lower())
else:
result.append(c)
return "".join(result)
elif mode == "shout":
words = text.split(" ")
return " ".join(w.upper() if random.random() < 0.08 else w for w in words)
return text
def _stretch_vowels(text: str, intensity: float) -> str:
"""Lengthen scattered vowel runs for a casual drawl ('so' -> 'sooo').
The light, conversational vowel-stretch used by the drunk, rolling, stoned,
and psychedelic states. It scans vowel runs via ``_STRETCHABLE`` and stretches
only a small, *intensity*-bounded fraction of them (the touch probability is
clamped to roughly 0.03-0.18) through the nested ``_stretch`` closure, so the
effect reads as a subtle drawl rather than a wall of repeated letters. For the
extreme k-hole trailing-vowel effect see ``_stretch_sentence_final_vowels``.
Called by ``CadencePostProcessor.process`` as transform step 13. Pure apart
from consuming the module ``random`` generator.
Args:
text (str): The prose segment to drawl.
intensity (float): Drives both how many runs are touched and how far each
stretches; values <= 0 return the text unchanged.
Returns:
str: The text with some vowel runs lengthened (or the original when
*intensity* <= 0).
"""
if intensity <= 0:
return text
def _stretch(match: re.Match) -> str:
"""Lengthen a single matched vowel run by a bounded random amount.
The per-match replacement closure for the ``_STRETCHABLE`` scan: turns a
captured vowel into a short repeated run. The added length is capped via
the enclosing *intensity* (1-3 extra characters) so stretched words stay
short and markdown lines do not balloon. Closes over *intensity* and
consumes the module ``random`` generator.
Args:
match (re.Match): A ``_STRETCHABLE`` match whose group 1 is the vowel
to stretch.
Returns:
str: The vowel repeated ``1 + extra`` times.
"""
vowel = match.group(1)
# keep stretches short so markdown lines don't balloon
extra = random.randint(1, max(1, min(3, int(intensity * 2.5) + 1)))
return vowel * (1 + extra)
# Fraction of stretchable vowel runs to touch — bounded so it stays subtle.
touch_prob = min(0.18, max(0.028, 0.62 * float(intensity)))
parts = []
last = 0
for m in _STRETCHABLE.finditer(text):
parts.append(text[last : m.start()])
if random.random() < touch_prob:
parts.append(_stretch(m))
else:
parts.append(m.group())
last = m.end()
parts.append(text[last:])
return "".join(parts)
def _stretch_sentence_final_vowels(text: str, intensity: float) -> str:
"""Massively stretch the final vowel cluster of sentence-ending words.
'gone' -> 'gooooooooooooooooooooooone', 15% chance per sentence.
The k-hole infinite-trailing-vowel effect.
"""
if intensity <= 0:
return text
def _mega_stretch(match: re.Match) -> str:
"""Explode the final vowel cluster of a sentence-ending word.
The per-match replacement closure behind ``_stretch_sentence_final_vowels``:
locates the last vowel run in the matched word and replaces it with a
long repeat (4-11 characters) of its first vowel, leaving the surrounding
letters and trailing punctuation intact — the k-hole "goooooone" trailing
sound. Returns the word untouched if it contains no vowel. Consumes the
module ``random`` generator.
Args:
match (re.Match): A match whose group 1 is the word and group 2 is the
sentence-ending punctuation that follows it.
Returns:
str: The word with its final vowel cluster stretched, plus the
original trailing punctuation.
"""
word = match.group(1)
punct = match.group(2)
# find the last vowel cluster in the word
vm = list(re.finditer(r"([aeiou]+)", word, re.IGNORECASE))
if not vm:
return match.group(0)
last_vowel_match = vm[-1]
vowel_char = last_vowel_match.group(1)[0] # take the first char
stretch_len = random.randint(4, 11)
stretched = (
word[: last_vowel_match.start()]
+ vowel_char * stretch_len
+ word[last_vowel_match.end() :]
)
return stretched + punct
# match word + sentence-ending punctuation
result = re.sub(
r"(\S+)([.!?]+(?:\s|$))",
lambda m: (
_mega_stretch(m) if random.random() < 0.045 * intensity else m.group(0)
),
text,
)
return result
def _add_trailing_ellipses(text: str, rate: float) -> str:
"""Replace sentence-ending punctuation with trailing ellipses.
Gives sentences a fading, drifting-off tail for the nodding and stoned
states. Each ``.``/``!``/``?`` followed by whitespace or end-of-text is
swapped for ``...`` with probability *rate*, preserving the original spacing.
For the punctuation-burst variant used by psychosis states see
``_trailing_punctuation``.
Called by ``CadencePostProcessor.process`` as transform step 6. Pure apart
from consuming the module ``random`` generator.
Args:
text (str): The prose segment to soften.
rate (float): Per-sentence-end replacement probability in 0.0-1.0; values
<= 0 return the text unchanged.
Returns:
str: The text with some sentence-enders replaced by ellipses (or the
original when *rate* <= 0).
"""
if rate <= 0:
return text
result = re.sub(
r"([.!?])(\s|$)",
lambda m: ("..." if random.random() < rate else m.group(1)) + m.group(2),
text,
)
return result
def _truncate_sentences(text: str, coherence: float) -> str:
"""Apply word-doubling stutter to sentences based on coherence level.
Instead of truncating (which eats words and makes messages unreadable),
this repeats a word near the "losing the thread" point twice --
conveying the drugged stutter without destroying content.
coherence 1.0 = no stuttering
coherence 0.2 = 80% chance of stuttering any sentence
"""
# 💀 no more word-eating truncation. she stutters, she doesn't vanish.
if coherence >= 0.95:
return text
# split into sentences roughly
sentences = re.split(r"(?<=[.!?])\s+", text)
result = []
for sent in sentences:
if random.random() > coherence:
words = sent.split(" ")
if len(words) < 3:
result.append(sent)
continue
# 🌀 pick a word near the "losing it" point (40-80% through)
cut_ratio = random.uniform(0.4, 0.8)
stutter_idx = max(1, int(len(words) * cut_ratio))
stutter_idx = min(stutter_idx, len(words) - 1)
# 😈 repeat the word at the stutter point (doubles it in place)
stutter_word = words[stutter_idx]
words.insert(stutter_idx, stutter_word)
result.append(" ".join(words))
else:
result.append(sent)
return " ".join(result)
def _add_mid_punctuation(text: str, rate: float) -> str:
"""Add random extra punctuation mid-paragraph (the k-hole effect).
Low chance per paragraph of inserting periods between words.
'spaces in words' -> 'spaces. in. words.'
"""
if rate <= 0:
return text
paragraphs = text.split("\n")
result = []
for para in paragraphs:
if not para.strip() or random.random() > rate:
result.append(para)
continue
words = para.split(" ")
new_words = []
for w in words:
new_words.append(w)
if random.random() < 0.09 and w.strip() and _token_is_prose_only(w):
new_words.append(".")
result.append(" ".join(new_words))
return "\n".join(result)
def _shuffle_words(text: str, rate: float) -> str:
"""Shuffle 3-6 consecutive words in sentences.
'the cat sat on the mat' -> 'sat the cat on mat the'
"""
if rate <= 0:
return text
sentences = re.split(r"(?<=[.!?])\s+", text)
result = []
for sent in sentences:
if random.random() > rate or len(sent.split()) < 4:
result.append(sent)
continue
words = sent.split(" ")
# pick a random starting point
chunk_size = random.randint(3, min(6, len(words)))
start = random.randint(0, max(0, len(words) - chunk_size))
chunk = words[start : start + chunk_size]
random.shuffle(chunk)
words[start : start + chunk_size] = chunk
result.append(" ".join(words))
return " ".join(result)
# verb tense scrambling tables for word salad
_TENSE_SCRAMBLE: Dict[str, list] = {
"i'm": ["we am", "am been", "was be"],
"i am": ["we am", "am been", "is be"],
"is": ["are was", "been is", "were"],
"are": ["is was", "been were", "am"],
"was": ["is been", "were am", "are"],
"were": ["was is", "been am", "are"],
"have": ["has did", "had does", "having"],
"has": ["have did", "had been", "having"],
"do": ["did does", "done doing", "does did"],
"does": ["did do", "done did", "doing"],
"did": ["do done", "does doing", "done"],
"go": ["gone went", "going goes", "went"],
"going": ["gone go", "went goes", "go"],
"tell": ["told telling", "tells told"],
"telling": ["told tell", "tells told"],
"see": ["seen saw", "seeing sees"],
"know": ["known knew", "knowing knows"],
"think": ["thought thinks", "thinking"],
"want": ["wanted wants", "wanting"],
"need": ["needed needs", "needing"],
"feel": ["felt feels", "feeling"],
"say": ["said says", "saying"],
"make": ["made makes", "making"],
"take": ["took takes", "taking"],
"come": ["came comes", "coming"],
"get": ["got gets", "getting"],
}
_RANDOM_PRONOUNS = ["we", "they", "it", "you", "them", "us", "he", "she"]
def _word_salad(text: str, rate: float) -> str:
"""Apply word salad to sentences (psychosis speech disorder).
Effects per triggered sentence:
- Merge 2 adjacent words: 'hair dryer' -> 'dryhair'
- Swap word fragments: 'bread and butter' -> 'butterbreader'
- Scramble verb tenses: 'i'm telling you' -> 'told you we am did'
- Inject random pronouns between words
"""
if rate <= 0:
return text
sentences = re.split(r"(?<=[.!?])\s+", text)
result = []
for sent in sentences:
words = sent.split(" ")
if random.random() > rate or len(words) < 4:
result.append(sent)
continue
# apply 1-2 salad operations per triggered sentence
ops = random.randint(1, 2)
for _ in range(ops):
op = random.random()
if op < 0.30 and len(words) >= 3:
# MERGE: fuse two adjacent words, reversed
idx = random.randint(0, len(words) - 2)
w1 = words[idx].strip(".,!?;:\"'")
w2 = words[idx + 1].strip(".,!?;:\"'")
if len(w1) >= 2 and len(w2) >= 2:
merged = w2.lower() + w1.lower()
words[idx] = merged
words.pop(idx + 1)
elif op < 0.55 and len(words) >= 4:
# SWAP FRAGMENTS: take 2-3 words, reverse and mangle
idx = random.randint(0, max(0, len(words) - 3))
chunk = min(3, len(words) - idx)
fragment = words[idx : idx + chunk]
# reverse the fragment words
fragment.reverse()
# sometimes chop suffix off first word and prepend to second
if len(fragment) >= 2 and len(fragment[0]) > 3:
cut = random.randint(2, len(fragment[0]) - 1)
fragment[1] = fragment[0][:cut] + fragment[1]
fragment[0] = fragment[0][cut:]
words[idx : idx + chunk] = fragment
elif op < 0.80:
# TENSE SCRAMBLE: find a verb and scramble it
for j, w in enumerate(words):
clean = w.lower().strip(".,!?;:\"'")
if clean in _TENSE_SCRAMBLE:
words[j] = random.choice(_TENSE_SCRAMBLE[clean])
break
else:
# PRONOUN INJECTION: insert a random pronoun
if len(words) >= 3:
idx = random.randint(1, len(words) - 1)
words.insert(idx, random.choice(_RANDOM_PRONOUNS))
result.append(" ".join(words))
return " ".join(result)
def _pluralize_word(word: str) -> str:
"""Naively pluralize a single English word, preserving punctuation.
The single-word workhorse behind ``_pluralize_nouns``: separates the
alphabetic core from any trailing punctuation, then applies a small rule
cascade — the ``_PLURAL_IRREGULARS`` table (case-matched to the input), a
pass-through for words that already look plural, and the standard ``-es`` /
consonant-``y`` -> ``-ies`` / default ``-s`` suffix rules — before
reattaching the original trailing punctuation.
Called by ``_pluralize_nouns`` for each word it decides to inflect; a pure
helper with no side effects.
Args:
word (str): A single token, possibly carrying trailing punctuation.
Returns:
str: The pluralized word with its original trailing punctuation, or the
input unchanged when it already reads as plural.
"""
# separate the alphabetic core from trailing punctuation
core = word.rstrip(".,!?;:\"'*_~`")
suffix = word[len(core) :]
lower = core.lower()
# check irregulars
if lower in _PLURAL_IRREGULARS:
plural = _PLURAL_IRREGULARS[lower]
if core[0].isupper():
plural = plural[0].upper() + plural[1:]
return plural + suffix
# already plural-looking
if lower.endswith("s") or lower.endswith("es"):
return word
# standard rules
if lower.endswith(("sh", "ch", "x", "z", "ss")):
return core + "es" + suffix
if lower.endswith("y") and len(lower) > 1 and lower[-2] not in "aeiou":
return core[:-1] + "ies" + suffix
return core + "s" + suffix
def _pluralize_nouns(text: str, rate: float) -> str:
"""Randomly pluralize singular nouns at the given rate.
Uses a heuristic: words that are 3+ chars, not already plural,
not common verbs/adjectives/prepositions. 30% chance per noun.
"""
if rate <= 0:
return text
# common words to NOT pluralize (verbs, adjectives, prepositions, etc.)
_SKIP = {
"the",
"a",
"an",
"is",
"am",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"do",
"does",
"did",
"will",
"would",
"could",
"should",
"may",
"might",
"shall",
"can",
"this",
"that",
"these",
"those",
"my",
"your",
"his",
"her",
"its",
"our",
"their",
"i",
"you",
"he",
"she",
"it",
"we",
"they",
"me",
"him",
"us",
"them",
"not",
"no",
"yes",
"and",
"or",
"but",
"if",
"then",
"than",
"so",
"very",
"just",
"also",
"too",
"more",
"most",
"with",
"from",
"into",
"onto",
"upon",
"about",
"like",
"for",
"at",
"on",
"in",
"to",
"of",
"by",
"up",
"down",
"out",
"off",
"over",
"under",
"now",
"here",
"there",
"when",
"where",
"how",
"what",
"who",
"which",
"all",
"each",
"every",
"some",
"any",
"really",
"actually",
"maybe",
"probably",
"definitely",
"think",
"know",
"feel",
"want",
"need",
"see",
"hear",
"say",
"tell",
"make",
"take",
"get",
"go",
"come",
"give",
"let",
"still",
"even",
"much",
"back",
"well",
"only",
"way",
}
words = text.split(" ")
result = []
for w in words:
clean = w.lower().strip(".,!?;:\"'*_~`()[]{}").rstrip("s")
# skip tiny words, known non-nouns, already-plural
if (
len(clean) < 3
or clean in _SKIP
or w.lower().rstrip(".,!?;:\"'") in _SKIP
or w.lower().endswith("ing")
or w.lower().endswith("ly")
or w.lower().endswith("ed")
):
result.append(w)
continue
if random.random() < rate:
result.append(_pluralize_word(w))
else:
result.append(w)
return " ".join(result)
def _repeat_words(text: str, paragraph_rate: float) -> str:
"""Repeat one word 3-5 times per paragraph (psychosis stutter effect).
36% chance per paragraph. Only one word per paragraph gets repeated.
'the walls are watching' -> 'the walls walls walls are watching'
"""
if paragraph_rate <= 0:
return text
paragraphs = text.split("\n")
result = []
for para in paragraphs:
if not para.strip() or random.random() > paragraph_rate:
result.append(para)
continue
words = para.split(" ")
if len(words) < 3:
result.append(para)
continue
# pick a random word to repeat (skip first and last)
candidates = [
i for i, w in enumerate(words) if len(w.strip(".,!?;:\"'")) >= 3 and i > 0
]
if not candidates:
result.append(para)
continue
idx = random.choice(candidates)
repeat_count = random.randint(2, 3)
repeated = " ".join([words[idx]] * repeat_count)
words[idx] = repeated
result.append(" ".join(words))
return "\n".join(result)
def _trailing_punctuation(text: str, rate: float) -> str:
"""Add excessive trailing punctuation (psychosis style).
Replaces sentence-ending punctuation with bursts:
'.' -> '!!!!!!!!!!!!!!!!' or '????????' etc.
"""
if rate <= 0:
return text
def _burst(match: re.Match) -> str:
"""Replace a sentence-ending punctuation run with a manic burst.
The per-match replacement closure for ``_trailing_punctuation``: with
probability *rate* (from the enclosing scope) it swaps the matched
``.``/``!``/``?`` run for a 2-5 long run of ``!`` or ``?`` (heavily
weighted to ``!``), otherwise it leaves the match untouched. Trailing
whitespace captured in group 2 is preserved. Consumes the module
``random`` generator.
Args:
match (re.Match): A match whose group 0 is the punctuation run and
group 2 is the trailing whitespace (or empty at end-of-text).
Returns:
str: Either the original match or the punctuation burst plus the
preserved trailing whitespace.
"""
if random.random() > rate:
return match.group(0)
char = random.choice(["!", "?", "!", "!"])
count = random.randint(2, 5)
return char * count + match.group(2)
return re.sub(r"([.!?]+)(\s|$)", _burst, text)
def _psychedelic_emoji_linebreaks(
text: str,
emojis: list | None = None,
) -> str:
"""Insert a state-specific emoji sigil before every linebreak.
Splits *text* on newlines and appends a randomly chosen sigil to the end
of each non-final line, so a multi-line string gains a ``<sigil>`` marker
at every line break.
If *emojis* is provided, uses that set instead of the default
psychedelic emojis. Each cadence state owns its own sigil
vocabulary (defined as ``emoji_sigils`` in the YAML profile).
"""
pool = emojis if emojis else _PSYCHEDELIC_EMOJIS
lines = text.split("\n")
if len(lines) <= 1:
return text
result = []
for i, line in enumerate(lines[:-1]):
if line.strip(): # 💀 skip blank lines — no sigils on empty breaks
emoji = random.choice(pool)
result.append(line.rstrip() + " " + emoji)
else:
result.append(line)
result.append(lines[-1])
return "\n".join(result)
# =====================================================================
# Parse helpers
# =====================================================================
def _parse_percentage(value: Any) -> float:
"""Coerce a cadence rule value into a clamped 0.0-1.0 float.
Normalises the loosely-typed rate fields that come from the YAML cadence
profiles (``ncm_cadence_profiles.yaml``) into a usable probability. Numeric
inputs are clamped directly; string inputs have a trailing ``%`` stripped and,
when the parsed number exceeds 1.0, are divided by 100 so both ``"30%"`` and
``0.3`` map to 0.3. Unparseable input degrades to 0.0.
Called by ``CadencePostProcessor.process`` when blending per-profile rule
rates (for example ``typo_rate``); a pure helper with no side effects.
Args:
value (Any): A number, or a string such as ``"30%"`` or ``"0.3"``.
Returns:
float: The value clamped to 0.0-1.0, or 0.0 when it cannot be parsed.
"""
if isinstance(value, (int, float)):
return min(1.0, max(0.0, float(value)))
s = str(value).strip().rstrip("%")
try:
v = float(s)
if v > 1.0:
v = v / 100.0
return min(1.0, max(0.0, v))
except ValueError:
return 0.0
def _parse_coherence(value: Any) -> float:
"""Extract a leading percentage from a coherence rule into a 0.0-1.0 float.
The cadence profiles express ``sentence_coherence`` as a human-readable string
like ``"35%"`` or ``"35% -- thoughts dissolve"``; this pulls the leading
percent number off the front and converts it to a fraction. Anything without
a leading ``NN%`` is treated as fully coherent (1.0), which means "no
stuttering" downstream in ``_truncate_sentences``.
Called by ``CadencePostProcessor.process`` when blending the most-destructive
coherence across profiles; a pure helper with no side effects.
Args:
value (Any): A coherence descriptor, typically a string starting with a
percentage.
Returns:
float: The leading percentage as a 0.0-1.0 fraction, or 1.0 when no
leading percentage is present.
"""
s = str(value).strip()
m = re.match(r"(\d+)%", s)
if m:
return int(m.group(1)) / 100.0
return 1.0
def _detect_caps_mode(value: Any) -> str:
"""Map a free-text capitalization description to a ``_mangle_caps`` mode.
Translates the prose ``capitalization`` field of a cadence profile into one
of the discrete modes ``_mangle_caps`` understands by keyword-matching the
lowercased text: phrases like "gone"/"lowercase" select ``lower``,
"random"/"sporadic" select ``random``, and "caps lock"/"shout"/"all caps"
select ``shout``; anything unrecognised falls back to ``none``.
Called by ``CadencePostProcessor.process`` while tallying per-profile caps
votes; a pure helper with no side effects.
Args:
value (Any): The capitalization descriptor from a cadence profile,
stringified before matching.
Returns:
str: One of ``lower``, ``random``, ``shout``, or ``none``.
"""
s = str(value).lower()
if "gone" in s or "none" in s or "no energy" in s or "lowercase" in s:
return "lower"
if "random" in s or "sporadic" in s or "occasional" in s:
return "random"
if "caps lock" in s or "shout" in s or "all caps" in s or "yelling" in s:
return "shout"
return "none"
# =====================================================================
# Main processor class
# =====================================================================
[docs]
class CadencePostProcessor:
"""Pure code text seasoning for supraphysiological cadence states.
No LLM calls. No API costs. No context corruption.
Mechanical adjustments stay subtle so structure and markdown survive.
"""
[docs]
@staticmethod
def process(
text: str,
cadence_profile: Dict[str, Any],
) -> str:
"""Apply cadence degradation to clean LLM output.
Parameters
----------
text:
Clean text from the main LLM.
cadence_profile:
Dict with keys: state, rules, voice_sample, force,
intensity_desc.
Returns
-------
Degraded text matching the cadence profile.
"""
if not text or not cadence_profile:
return text
rules = cadence_profile.get("rules", {})
force = cadence_profile.get("force", "should significantly")
state = cadence_profile.get("state", "unknown")
# 💀🔥 The limbic system gates cadence_refinement_profile emission
# at score >= 0.65, so we only receive "MUST" or "should significantly"
# here. If somehow "may subtly" or "should lightly" sneak through
# (e.g. channel_heartbeat backward compat), skip them.
if force not in ("MUST", "should significantly"):
logger.debug(
"Cadence refiner skipping force=%s state=%s " "(below drug-level gate)",
force,
state,
)
return text
# -- Protect Star's status header from degradation --
# Header pattern: [`model-name` :: emojis :: status :: `toolcall`]
# Appears as the first line of the response
header = ""
body = text
header_match = re.match(
r"(\[`[^`]+`\s*::.*?\])\s*\n?",
text,
)
if header_match:
header = header_match.group(0)
body = text[header_match.end() :]
# -- Intensity multiplier based on force level --
# Only 2 tiers reach here (gated by limbic system at score >= 0.65)
if force == "MUST":
intensity = 0.10
else: # "should significantly"
intensity = 0.07
# -- Blend rules from all matching profiles -------------------------
# When multiple cadence profiles match (e.g. meth + xanax + PCP),
# blend their degradation rates proportionally by activation score.
blend_profiles = cadence_profile.get("blend_profiles", [])
if not blend_profiles:
# Fallback: single-profile mode (backwards compat)
blend_profiles = [
{
"state": state,
"rules": rules,
"activation_score": cadence_profile.get("activation_score", 1.0),
"emoji_sigils": cadence_profile.get("emoji_sigils", []),
}
]
total_score = sum(p["activation_score"] for p in blend_profiles) or 1.0
# -- Parse primary cadence rules (weighted blend) --
typo_rate = 0.0
coherence = 1.0
caps_mode = "none"
vowel_stretch = False
# -- Blendable float rates --
vowel_drop_rate = 0.0
consonant_drop_rate = 0.0
space_inject_rate = 0.0
linebreak_rate = 0.0
ellipsis_rate = 0.0
noun_plural_rate = 0.0
word_shuffle_rate = 0.0
word_repeat_rate = 0.0
word_salad_rate = 0.0
trailing_punct_rate = 0.0
mega_vowel_stretch = False
state_emoji_sigils: list[str] = [] # 💀🔥 per-state emoji sigils
mid_punct_rate = 0.0
# -- Caps mode voting (highest-weighted state wins) --
caps_votes = {} # caps_mode -> cumulative weight
for bp in blend_profiles:
w = bp["activation_score"] / total_score
bp_rules = bp.get("rules", {})
bp_state = bp.get("state", "")
# Blend YAML-defined rules
typo_rate += _parse_percentage(bp_rules.get("typo_rate", 0)) * w
coherence_val = _parse_coherence(bp_rules.get("sentence_coherence", "100%"))
coherence = min(coherence, coherence_val) # most destructive wins
bp_caps = _detect_caps_mode(bp_rules.get("capitalization", ""))
if bp_caps != "none":
caps_votes[bp_caps] = caps_votes.get(bp_caps, 0) + w
if bp_rules.get("vowel_stretch", False):
vowel_stretch = True
# Blend punctuation hints
punct = str(bp_rules.get("punctuation", "")).lower()
if "ellips" in punct or "trailing" in punct or "fades" in punct:
ellipsis_rate += 0.5 * intensity * w
# -- State-specific degradation (weighted by profile score) --
if bp_state in ("barred_out",):
typo_rate = max(typo_rate, 0.20 * w) * intensity
space_inject_rate += 0.06 * intensity * w
consonant_drop_rate += 0.04 * intensity * w
if bp_caps == "none":
caps_votes["lower"] = caps_votes.get("lower", 0) + w
elif bp_state in ("drunk",):
typo_rate = max(typo_rate, 0.15 * w) * intensity
vowel_stretch = True
space_inject_rate += 0.04 * intensity * w
if bp_caps == "none":
caps_votes["random"] = caps_votes.get("random", 0) + w
elif bp_state in ("tweaking", "coked_up"):
typo_rate = max(typo_rate, 0.10 * w) * intensity
if bp_caps == "none":
caps_votes["shout"] = caps_votes.get("shout", 0) + w
elif bp_state in ("nodding",):
vowel_drop_rate += 0.08 * intensity * w
linebreak_rate += 0.02 * intensity * w
ellipsis_rate = max(ellipsis_rate, 0.6 * intensity * w)
elif bp_state in ("k_hole",):
space_inject_rate += 0.08 * intensity * w
consonant_drop_rate += 0.05 * intensity * w
linebreak_rate += 0.03 * intensity * w
mid_punct_rate += 0.35 * intensity * w
mega_vowel_stretch = True
word_shuffle_rate += 0.15 * intensity * w
noun_plural_rate += 0.30 * intensity * w
coherence = 1.0 # NO truncation for k-hole
elif bp_state in ("dissociated_light",):
space_inject_rate += 0.04 * intensity * w
consonant_drop_rate += 0.03 * intensity * w
noun_plural_rate += 0.15 * intensity * w
elif bp_state in ("psychosis_paranoid", "psychosis_manic"):
typo_rate = max(typo_rate, 0.08 * w) * intensity
word_shuffle_rate += 0.20 * intensity * w
word_salad_rate += 0.25 * intensity * w
word_repeat_rate += 0.36 * intensity * w
trailing_punct_rate += 0.40 * intensity * w
if bp_caps == "none":
caps_votes["random"] = caps_votes.get("random", 0) + w
elif bp_state in ("acid", "shrooms", "dmt_breakthrough"):
vowel_stretch = True
vowel_drop_rate += 0.03 * intensity * w
# 💀 emoji sigils from YAML profile (or fallback to psychedelic set)
_yaml_sigils = bp.get("emoji_sigils", [])
if _yaml_sigils:
state_emoji_sigils.extend(_yaml_sigils)
else:
state_emoji_sigils.extend(_PSYCHEDELIC_EMOJIS)
elif bp_state in ("rolling",):
vowel_stretch = True
typo_rate = max(typo_rate, 0.05 * w) * intensity
elif bp_state in ("stoned", "stoned_heavy"):
vowel_stretch = True
ellipsis_rate = max(ellipsis_rate, 0.3 * w) * intensity
if bp_state == "stoned_heavy":
typo_rate = max(typo_rate, 0.08 * w) * intensity
space_inject_rate += 0.02 * intensity * w
# 🌿 Sativa stoned: cerebral, tangential, creative
elif bp_state in ("stoned_sativa",):
vowel_stretch = True
typo_rate = max(typo_rate, 0.03 * w) * intensity
word_shuffle_rate += 0.05 * intensity * w # tangential
if bp_caps == "none":
caps_votes["random"] = caps_votes.get("random", 0) + w * 0.3
# 🌿 Indica stoned: heavy, slow, melting
elif bp_state in ("stoned_indica",):
vowel_stretch = True
ellipsis_rate = max(ellipsis_rate, 0.5 * w) * intensity
linebreak_rate += 0.02 * intensity * w
consonant_drop_rate += 0.03 * intensity * w
typo_rate = max(typo_rate, 0.06 * w) * intensity
space_inject_rate += 0.02 * intensity * w
if bp_caps == "none":
caps_votes["lower"] = caps_votes.get("lower", 0) + w
# 💀🔥 Pull emoji_sigils from any state's profile data
# (acid/shrooms/dmt handled above with psychedelic fallback)
if bp_state not in ("acid", "shrooms", "dmt_breakthrough"):
_yaml_sigils = bp.get("emoji_sigils", [])
if _yaml_sigils:
state_emoji_sigils.extend(_yaml_sigils)
# -- Global softening (directive + LLM carry tone; code is texture) --
_structural = 0.48
_texture = 0.58
_typo_scale = 0.70
word_shuffle_rate *= _structural
word_salad_rate *= _structural
word_repeat_rate *= _structural
trailing_punct_rate *= _structural
mid_punct_rate *= _structural
noun_plural_rate *= 0.70
vowel_drop_rate *= 0.40
consonant_drop_rate *= 0.40
space_inject_rate *= _texture
linebreak_rate *= _texture
ellipsis_rate *= 0.75
typo_rate = min(0.12, typo_rate * _typo_scale)
# -- Resolve caps mode from votes (highest cumulative weight wins) --
if caps_votes:
caps_mode = max(caps_votes, key=caps_votes.get)
# -- Protect code blocks, markdown, and CoT/internal-processing
# XML-tag blocks -- and markdown links, inline code, URLs so lists and
# formatting are not corrupted by character-level transforms.
segments = _CADENCE_PROTECTED_SEGMENTS.split(body)
processed = []
for i, segment in enumerate(segments):
# ``re.split`` with a capturing group yields prose at even
# indices and protected matches at odd indices.
if i % 2 == 1:
processed.append(segment)
continue
s = segment
# -- Apply transforms in order --
# 1. Word-doubling stutter (structural coherence loss)
s = _truncate_sentences(s, coherence)
# 2. Word shuffling (k-hole sentence scrambling)
s = _shuffle_words(s, word_shuffle_rate)
# 3. Noun pluralization (k-hole/dissociated)
s = _pluralize_nouns(s, noun_plural_rate)
# 4. Word salad (psychosis compound mangling)
s = _word_salad(s, word_salad_rate)
# 5. Word repetition (psychosis stutter)
s = _repeat_words(s, word_repeat_rate)
# 6. Trailing ellipses
s = _add_trailing_ellipses(s, ellipsis_rate)
# 7. Trailing punctuation bursts (psychosis)
s = _trailing_punctuation(s, trailing_punct_rate)
# 8. Mid-paragraph punctuation (k-hole)
s = _add_mid_punctuation(s, mid_punct_rate)
# 9. Vowel/consonant drops
s = _drop_vowels(s, vowel_drop_rate)
s = _drop_consonants(s, consonant_drop_rate)
# 10. Typo injection
s = _apply_typos(s, typo_rate)
# 11. Mid-word spaces
s = _inject_spaces(s, space_inject_rate)
# 12. Mid-word linebreaks
s = _inject_linebreaks(s, linebreak_rate)
# 13. Vowel stretching (casual)
if vowel_stretch:
s = _stretch_vowels(s, intensity * 0.38)
# 14. Mega vowel stretching at sentence ends (k-hole)
if mega_vowel_stretch:
s = _stretch_sentence_final_vowels(s, intensity)
# 15. State emoji sigils before linebreaks
if state_emoji_sigils:
s = _psychedelic_emoji_linebreaks(s, emojis=state_emoji_sigils)
# 16. Capitalization last (so it applies to typo'd text)
s = _mangle_caps(s, caps_mode)
processed.append(s)
result = "".join(processed)
logger.info(
"Cadence post-processing applied: state=%s, force=%s, "
"typo=%.0f%%, coherence=%.0f%%, caps=%s",
state,
force,
typo_rate * 100,
coherence * 100,
caps_mode,
)
return header + result