"""Message utility functions for Discord bot.
Includes markdown-aware message splitting that preserves formatting across
chunk boundaries, plus helper functions for mention filtering and XML escaping.
"""
import re
from typing import List
[docs]
def escape_xml(text: str) -> str:
"""Escape XML characters in text to prevent parsing issues."""
return (
text.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace('"', """)
.replace("'", "'")
)
[docs]
def filter_backticks_from_mentions(text: str) -> str:
"""Remove backticks surrounding Discord user mentions.
Converts patterns like `<@82303438955753472>` to <@82303438955753472>.
Also handles triple-backtick wrapping.
"""
if not isinstance(text, str):
return text
pattern = r'`+(<@!?\d+>)`+'
return re.sub(pattern, r'\1', text)
[docs]
def split_message(
text: str,
max_length: int = 1950,
overflow_allowed: int = 45,
) -> List[str]:
"""Split a string into chunks respecting markdown formatting.
Handles manual split markers (``{{ SPLIT_HERE }}``, ``---\\n``),
code blocks, and active markdown formats (bold, italic, etc.).
Formats are closed at the end of each chunk and re-opened at the
start of the next so that Discord renders them correctly.
"""
if not isinstance(text, str):
return ["Error: Invalid message content"]
if not text:
return [""]
hard_limit = max_length + overflow_allowed
final_chunks: List[str] = []
text = text.replace("---\n", "{{ SPLIT_HERE }}")
text = text.replace('`\n\n`', '{{ SPLIT_HERE }}')
text = text.replace('`\n`', '{{ SPLIT_HERE }}')
manual_split_parts = text.split("{{ SPLIT_HERE }}")
for text_part in manual_split_parts:
text_part = text_part.strip()
if not text_part:
continue
processed_text = text_part.replace("'''", "```")
chunks = _split_with_markdown_awareness(
processed_text, max_length, hard_limit
)
final_chunks.extend(c for c in chunks if c)
return final_chunks
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _split_with_markdown_awareness(
text: str, target_length: int, hard_limit: int,
) -> List[str]:
"""Split *text* while preserving markdown formatting."""
chunks: List[str] = []
current_chunk = ""
in_code_block = False
code_block_lang = ""
active_formats: List[str] = []
lines = text.split('\n')
i = 0
while i < len(lines):
line = lines[i]
if "```" in line:
parts = line.split("```")
for j, part in enumerate(parts):
if j > 0:
if in_code_block:
current_chunk = current_chunk.rstrip('\n') + "\n```"
in_code_block = False
code_block_lang = ""
if len(current_chunk) > target_length:
chunks.append(current_chunk.strip())
current_chunk = ""
else:
in_code_block = True
stripped = part.strip()
code_block_lang = (
stripped.split()[0] if stripped else ""
)
if len(current_chunk) > 0:
current_chunk = _close_formats(
current_chunk, active_formats
)
chunks.append(current_chunk.strip())
current_chunk = ""
current_chunk = f"```{code_block_lang}\n"
active_formats.clear()
continue
if part or j == 0:
if in_code_block:
current_chunk += part
else:
if part.strip():
current_chunk, active_formats = (
_add_line_with_format_tracking(
current_chunk, part, active_formats,
target_length, hard_limit, chunks,
)
)
i += 1
continue
if in_code_block:
potential_chunk = current_chunk + line + "\n"
if len(potential_chunk) > target_length:
current_chunk = current_chunk.rstrip() + "\n```"
chunks.append(current_chunk)
current_chunk = f"```{code_block_lang}\n{line}\n"
else:
current_chunk = potential_chunk
else:
current_chunk, active_formats = _add_line_with_format_tracking(
current_chunk, line, active_formats,
target_length, hard_limit, chunks,
)
i += 1
if current_chunk.strip():
if in_code_block:
current_chunk += "\n```"
else:
current_chunk = _close_formats(current_chunk, active_formats)
chunks.append(current_chunk.strip())
return chunks
def _add_line_with_format_tracking(
current_chunk: str,
line: str,
active_formats: List[str],
target_length: int,
hard_limit: int,
chunks: List[str],
) -> tuple:
"""Add a line to *current_chunk* while tracking markdown formats."""
words = line.split(' ')
is_list_item = False
if words and len(words) > 1:
first_word = words[0]
if first_word in ('*', '-', '+'):
is_list_item = True
elif re.match(r'^\d+\.$', first_word):
is_list_item = True
for idx, word in enumerate(words):
if len(word) >= hard_limit:
if current_chunk:
current_chunk = _close_formats(current_chunk, active_formats)
chunks.append(current_chunk.strip())
current_chunk = ""
current_chunk = _open_formats(current_chunk, active_formats)
for j in range(0, len(word), hard_limit):
chunks.append(word[j:j + hard_limit])
current_chunk = ""
current_chunk = _open_formats(current_chunk, active_formats)
continue
format_overhead = sum(len(tag) for tag in active_formats) * 2
space = (
" " if current_chunk
and not current_chunk.endswith('\n') else ""
)
potential_length = (
len(current_chunk) + len(space) + len(word) + format_overhead
)
if potential_length > target_length and current_chunk:
current_chunk = _close_formats(current_chunk, active_formats)
if len(current_chunk) > hard_limit:
current_chunk = current_chunk[:hard_limit]
chunks.append(current_chunk.strip())
current_chunk = ""
current_chunk = _open_formats(current_chunk, active_formats)
if current_chunk and not current_chunk.endswith('\n'):
current_chunk += " "
current_chunk += word
if not (is_list_item and idx == 0):
active_formats = _update_formats_from_word(word, active_formats)
current_chunk += "\n"
return current_chunk, active_formats
def _update_formats_from_word(
word: str, active_formats: List[str]
) -> List[str]:
"""Update active markdown formats based on markers found in *word*.
Handles ``**``, ``__``, ``*``, ``_``, ``~~``, ``||``,
and inline code
(including multi-backtick sequences). Single ``*`` and ``_`` are only
counted when they appear at word boundaries to avoid false positives on
identifiers like ``some_variable``.
"""
formats = active_formats.copy()
active_inline_code = None
for fmt in formats:
if fmt.startswith('INLINE_CODE_'):
active_inline_code = fmt
break
i = 0
while i < len(word):
if word[i] == '`':
backtick_count = 0
j = i
while j < len(word) and word[j] == '`':
backtick_count += 1
j += 1
code_marker = f'INLINE_CODE_{backtick_count}'
if active_inline_code:
if code_marker == active_inline_code:
formats.remove(active_inline_code)
active_inline_code = None
else:
formats.append(code_marker)
active_inline_code = code_marker
i = j
else:
i += 1
if active_inline_code:
return formats
word_without_code = re.sub(r'`+[^`]*`+', '', word)
if not word_without_code.strip():
return formats
for tag in ["**", "__", "~~", "||"]:
count = word_without_code.count(tag)
for _ in range(count):
if tag in formats:
formats.remove(tag)
else:
formats.append(tag)
temp_word = word_without_code.replace("**", "").replace("__", "")
for tag in ["*", "_"]:
positions = [i for i, c in enumerate(temp_word) if c == tag]
for pos in positions:
at_start = pos == 0
at_end = pos == len(temp_word) - 1
before_is_boundary = at_start or not temp_word[pos - 1].isalnum()
after_is_boundary = at_end or not temp_word[pos + 1].isalnum()
if before_is_boundary or after_is_boundary:
if tag in formats:
formats.remove(tag)
else:
formats.append(tag)
return formats
def _close_formats(chunk: str, active_formats: List[str]) -> str:
"""Close all active markdown formats in reverse order."""
for tag in reversed(active_formats):
if tag.startswith('INLINE_CODE_'):
n = int(tag.split('_')[2])
chunk += '`' * n
else:
chunk += tag
return chunk
def _open_formats(chunk: str, active_formats: List[str]) -> str:
"""Open all active markdown formats."""
for tag in active_formats:
if tag.startswith('INLINE_CODE_'):
n = int(tag.split('_')[2])
chunk += '`' * n
else:
chunk += tag
return chunk