Source code for message_utils

"""Message utility functions for Discord bot.

Includes markdown-aware message splitting that preserves formatting across
chunk boundaries, plus helper functions for mention filtering and XML escaping.
"""

import re
from typing import List


[docs] def escape_xml(text: str) -> str: """Escape XML characters in text to prevent parsing issues.""" return ( text.replace("&", "&amp;") .replace("<", "&lt;") .replace(">", "&gt;") .replace('"', "&quot;") .replace("'", "&apos;") )
[docs] def filter_backticks_from_mentions(text: str) -> str: """Remove backticks surrounding Discord user mentions. Converts patterns like `<@82303438955753472>` to <@82303438955753472>. Also handles triple-backtick wrapping. """ if not isinstance(text, str): return text pattern = r'`+(<@!?\d+>)`+' return re.sub(pattern, r'\1', text)
[docs] def split_message( text: str, max_length: int = 1950, overflow_allowed: int = 45, ) -> List[str]: """Split a string into chunks respecting markdown formatting. Handles manual split markers (``{{ SPLIT_HERE }}``, ``---\\n``), code blocks, and active markdown formats (bold, italic, etc.). Formats are closed at the end of each chunk and re-opened at the start of the next so that Discord renders them correctly. """ if not isinstance(text, str): return ["Error: Invalid message content"] if not text: return [""] hard_limit = max_length + overflow_allowed final_chunks: List[str] = [] text = text.replace("---\n", "{{ SPLIT_HERE }}") text = text.replace('`\n\n`', '{{ SPLIT_HERE }}') text = text.replace('`\n`', '{{ SPLIT_HERE }}') manual_split_parts = text.split("{{ SPLIT_HERE }}") for text_part in manual_split_parts: text_part = text_part.strip() if not text_part: continue processed_text = text_part.replace("'''", "```") chunks = _split_with_markdown_awareness( processed_text, max_length, hard_limit ) final_chunks.extend(c for c in chunks if c) return final_chunks
# ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ def _split_with_markdown_awareness( text: str, target_length: int, hard_limit: int, ) -> List[str]: """Split *text* while preserving markdown formatting.""" chunks: List[str] = [] current_chunk = "" in_code_block = False code_block_lang = "" active_formats: List[str] = [] lines = text.split('\n') i = 0 while i < len(lines): line = lines[i] if "```" in line: parts = line.split("```") for j, part in enumerate(parts): if j > 0: if in_code_block: current_chunk = current_chunk.rstrip('\n') + "\n```" in_code_block = False code_block_lang = "" if len(current_chunk) > target_length: chunks.append(current_chunk.strip()) current_chunk = "" else: in_code_block = True stripped = part.strip() code_block_lang = ( stripped.split()[0] if stripped else "" ) if len(current_chunk) > 0: current_chunk = _close_formats( current_chunk, active_formats ) chunks.append(current_chunk.strip()) current_chunk = "" current_chunk = f"```{code_block_lang}\n" active_formats.clear() continue if part or j == 0: if in_code_block: current_chunk += part else: if part.strip(): current_chunk, active_formats = ( _add_line_with_format_tracking( current_chunk, part, active_formats, target_length, hard_limit, chunks, ) ) i += 1 continue if in_code_block: potential_chunk = current_chunk + line + "\n" if len(potential_chunk) > target_length: current_chunk = current_chunk.rstrip() + "\n```" chunks.append(current_chunk) current_chunk = f"```{code_block_lang}\n{line}\n" else: current_chunk = potential_chunk else: current_chunk, active_formats = _add_line_with_format_tracking( current_chunk, line, active_formats, target_length, hard_limit, chunks, ) i += 1 if current_chunk.strip(): if in_code_block: current_chunk += "\n```" else: current_chunk = _close_formats(current_chunk, active_formats) chunks.append(current_chunk.strip()) return chunks def _add_line_with_format_tracking( current_chunk: str, line: str, active_formats: List[str], target_length: int, hard_limit: int, chunks: List[str], ) -> tuple: """Add a line to *current_chunk* while tracking markdown formats.""" words = line.split(' ') is_list_item = False if words and len(words) > 1: first_word = words[0] if first_word in ('*', '-', '+'): is_list_item = True elif re.match(r'^\d+\.$', first_word): is_list_item = True for idx, word in enumerate(words): if len(word) >= hard_limit: if current_chunk: current_chunk = _close_formats(current_chunk, active_formats) chunks.append(current_chunk.strip()) current_chunk = "" current_chunk = _open_formats(current_chunk, active_formats) for j in range(0, len(word), hard_limit): chunks.append(word[j:j + hard_limit]) current_chunk = "" current_chunk = _open_formats(current_chunk, active_formats) continue format_overhead = sum(len(tag) for tag in active_formats) * 2 space = ( " " if current_chunk and not current_chunk.endswith('\n') else "" ) potential_length = ( len(current_chunk) + len(space) + len(word) + format_overhead ) if potential_length > target_length and current_chunk: current_chunk = _close_formats(current_chunk, active_formats) if len(current_chunk) > hard_limit: current_chunk = current_chunk[:hard_limit] chunks.append(current_chunk.strip()) current_chunk = "" current_chunk = _open_formats(current_chunk, active_formats) if current_chunk and not current_chunk.endswith('\n'): current_chunk += " " current_chunk += word if not (is_list_item and idx == 0): active_formats = _update_formats_from_word(word, active_formats) current_chunk += "\n" return current_chunk, active_formats def _update_formats_from_word( word: str, active_formats: List[str] ) -> List[str]: """Update active markdown formats based on markers found in *word*. Handles ``**``, ``__``, ``*``, ``_``, ``~~``, ``||``, and inline code (including multi-backtick sequences). Single ``*`` and ``_`` are only counted when they appear at word boundaries to avoid false positives on identifiers like ``some_variable``. """ formats = active_formats.copy() active_inline_code = None for fmt in formats: if fmt.startswith('INLINE_CODE_'): active_inline_code = fmt break i = 0 while i < len(word): if word[i] == '`': backtick_count = 0 j = i while j < len(word) and word[j] == '`': backtick_count += 1 j += 1 code_marker = f'INLINE_CODE_{backtick_count}' if active_inline_code: if code_marker == active_inline_code: formats.remove(active_inline_code) active_inline_code = None else: formats.append(code_marker) active_inline_code = code_marker i = j else: i += 1 if active_inline_code: return formats word_without_code = re.sub(r'`+[^`]*`+', '', word) if not word_without_code.strip(): return formats for tag in ["**", "__", "~~", "||"]: count = word_without_code.count(tag) for _ in range(count): if tag in formats: formats.remove(tag) else: formats.append(tag) temp_word = word_without_code.replace("**", "").replace("__", "") for tag in ["*", "_"]: positions = [i for i, c in enumerate(temp_word) if c == tag] for pos in positions: at_start = pos == 0 at_end = pos == len(temp_word) - 1 before_is_boundary = at_start or not temp_word[pos - 1].isalnum() after_is_boundary = at_end or not temp_word[pos + 1].isalnum() if before_is_boundary or after_is_boundary: if tag in formats: formats.remove(tag) else: formats.append(tag) return formats def _close_formats(chunk: str, active_formats: List[str]) -> str: """Close all active markdown formats in reverse order.""" for tag in reversed(active_formats): if tag.startswith('INLINE_CODE_'): n = int(tag.split('_')[2]) chunk += '`' * n else: chunk += tag return chunk def _open_formats(chunk: str, active_formats: List[str]) -> str: """Open all active markdown formats.""" for tag in active_formats: if tag.startswith('INLINE_CODE_'): n = int(tag.split('_')[2]) chunk += '`' * n else: chunk += tag return chunk