Source code for tools.universal_decoder

"""Universal Decoder tool.

Automatic detection and decoding of text encoded in various formats including
hex, binary, base64, base32, base58, base62, ASCII85, URL encoding,
HTML entities, ROT13, ROT47, and Morse code.
"""

from __future__ import annotations

import base64
import binascii
import codecs
import html
import json
import logging
import re
import string
from typing import Dict, List, Optional, Tuple, TYPE_CHECKING
from urllib.parse import unquote

if TYPE_CHECKING:
    from tool_context import ToolContext

logger = logging.getLogger(__name__)



[docs]
def decode_hexadecimal(text: str) -> Optional[str]:
    """Decode hexadecimal.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        cleaned = text.strip()
        cleaned = re.sub(r'^(0x|\\x)', '', cleaned, flags=re.IGNORECASE)
        cleaned = re.sub(r'(\\x|0x|\s|:|-)', '', cleaned, flags=re.IGNORECASE)
        if not cleaned or len(cleaned) % 2 != 0:
            return None
        if not re.match(r'^[0-9a-fA-F]+$', cleaned):
            return None
        decoded = bytes.fromhex(cleaned).decode('utf-8', errors='replace')
        return decoded if decoded else None
    except Exception:
        return None




[docs]
def decode_binary(text: str) -> Optional[str]:
    """Decode binary.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        cleaned = re.sub(r'[\s,.-]+', '', text.strip())
        if not re.match(r'^[01]+$', cleaned):
            return None
        if len(cleaned) % 8 != 0:
            return None
        chars = [chr(int(cleaned[i:i+8], 2)) for i in range(0, len(cleaned), 8)]
        decoded = ''.join(chars)
        return decoded if decoded else None
    except Exception:
        return None




[docs]
def decode_base64(text: str) -> Optional[str]:
    """Decode base64.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        cleaned = text.strip()
        try:
            missing = len(cleaned) % 4
            if missing:
                cleaned += '=' * (4 - missing)
            decoded = base64.b64decode(cleaned).decode('utf-8', errors='replace')
            return decoded if decoded else None
        except Exception:
            pass
        try:
            cleaned = text.strip()
            missing = len(cleaned) % 4
            if missing:
                cleaned += '=' * (4 - missing)
            decoded = base64.urlsafe_b64decode(cleaned).decode('utf-8', errors='replace')
            return decoded if decoded else None
        except Exception:
            pass
        return None
    except Exception:
        return None




[docs]
def decode_base32(text: str) -> Optional[str]:
    """Decode base32.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        cleaned = text.strip().upper()
        missing = len(cleaned) % 8
        if missing:
            cleaned += '=' * (8 - missing)
        decoded = base64.b32decode(cleaned).decode('utf-8', errors='replace')
        return decoded if decoded else None
    except Exception:
        return None



BASE58_ALPHABET = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'



[docs]
def decode_base58(text: str) -> Optional[str]:
    """Decode base58.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        cleaned = text.strip()
        if not all(c in BASE58_ALPHABET for c in cleaned):
            return None
        num = 0
        for char in cleaned:
            num = num * 58 + BASE58_ALPHABET.index(char)
        result_bytes = []
        while num > 0:
            result_bytes.append(num % 256)
            num //= 256
        for char in cleaned:
            if char == '1':
                result_bytes.append(0)
            else:
                break
        result_bytes.reverse()
        decoded = bytes(result_bytes).decode('utf-8', errors='replace')
        return decoded if decoded else None
    except Exception:
        return None



BASE62_ALPHABET = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'



[docs]
def decode_base62(text: str) -> Optional[str]:
    """Decode base62.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        cleaned = text.strip()
        if not all(c in BASE62_ALPHABET for c in cleaned):
            return None
        num = 0
        for char in cleaned:
            num = num * 62 + BASE62_ALPHABET.index(char)
        result_bytes = []
        while num > 0:
            result_bytes.append(num % 256)
            num //= 256
        result_bytes.reverse()
        decoded = bytes(result_bytes).decode('utf-8', errors='replace')
        return decoded if decoded else None
    except Exception:
        return None




[docs]
def decode_ascii85(text: str) -> Optional[str]:
    """Decode ascii85.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        cleaned = text.strip()
        if cleaned.startswith('<~') and cleaned.endswith('~>'):
            cleaned = cleaned[2:-2]
        decoded = base64.a85decode(cleaned).decode('utf-8', errors='replace')
        return decoded if decoded else None
    except Exception:
        return None




[docs]
def decode_url_encoding(text: str) -> Optional[str]:
    """Decode url encoding.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        if '%' not in text:
            return None
        decoded = unquote(text)
        return decoded if decoded != text else None
    except Exception:
        return None




[docs]
def decode_html_entities(text: str) -> Optional[str]:
    """Decode html entities.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        if not re.search(r'&[#\w]+;', text):
            return None
        decoded = html.unescape(text)
        return decoded if decoded != text else None
    except Exception:
        return None




[docs]
def decode_rot13(text: str) -> Optional[str]:
    """Decode rot13.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        return codecs.decode(text, 'rot_13')
    except Exception:
        return None




[docs]
def decode_rot47(text: str) -> Optional[str]:
    """Decode rot47.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        result = []
        for char in text:
            code = ord(char)
            if 33 <= code <= 126:
                result.append(chr(33 + ((code - 33 + 47) % 94)))
            else:
                result.append(char)
        decoded = ''.join(result)
        return decoded
    except Exception:
        return None



MORSE_TO_CHAR = {
    '.-': 'A', '-...': 'B', '-.-.': 'C', '-..': 'D', '.': 'E',
    '..-.': 'F', '--.': 'G', '....': 'H', '..': 'I', '.---': 'J',
    '-.-': 'K', '.-..': 'L', '--': 'M', '-.': 'N', '---': 'O',
    '.--.': 'P', '--.-': 'Q', '.-.': 'R', '...': 'S', '-': 'T',
    '..-': 'U', '...-': 'V', '.--': 'W', '-..-': 'X', '-.--': 'Y',
    '--..': 'Z',
    '-----': '0', '.----': '1', '..---': '2', '...--': '3', '....-': '4',
    '.....': '5', '-....': '6', '--...': '7', '---..': '8', '----.': '9',
    '.-.-.-': '.', '--..--': ',', '..--..': '?', '.----.': "'",
    '-.-.--': '!', '-..-.': '/', '-.--.': '(', '-.--.-': ')',
    '.-...': '&', '---...': ':', '-.-.-.': ';', '-...-': '=',
    '.-.-.': '+', '-....-': '-', '..--.-': '_', '.-..-.': '"',
    '...-..-': '$', '.--.-.': '@',
}



[docs]
def decode_morse(text: str) -> Optional[str]:
    """Decode morse.

        Args:
            text (str): Text content.

        Returns:
            Optional[str]: The result.
        """
    try:
        if not re.search(r'[\.\-]', text):
            return None
        cleaned = text.strip()
        cleaned = re.sub(r'\s*/\s*|\s*\|\s*|\s{3,}', ' / ', cleaned)
        words = cleaned.split(' / ')
        decoded_words = []
        for word in words:
            letters = word.strip().split()
            decoded_word = ''
            for letter in letters:
                letter = letter.strip()
                if letter in MORSE_TO_CHAR:
                    decoded_word += MORSE_TO_CHAR[letter]
                elif letter:
                    decoded_word += '?'
            decoded_words.append(decoded_word)
        decoded = ' '.join(decoded_words)
        return decoded if decoded.strip() else None
    except Exception:
        return None




[docs]
def is_readable_text(text: str, threshold: float = 0.7) -> bool:
    """Check whether is readable text.

        Args:
            text (str): Text content.
            threshold (float): The threshold value.

        Returns:
            bool: True on success, False otherwise.
        """
    if not text:
        return False
    printable_chars = sum(1 for c in text if c.isprintable() or c in '\n\r\t')
    return (printable_chars / len(text)) >= threshold




[docs]
def score_decoding(decoded: str) -> float:
    """Score decoding.

        Args:
            decoded (str): The decoded value.

        Returns:
            float: The result.
        """
    if not decoded:
        return 0.0
    score = 0.0
    printable_ratio = sum(1 for c in decoded if c.isprintable() or c in '\n\r\t') / len(decoded)
    score += printable_ratio * 40
    letter_ratio = sum(1 for c in decoded if c.isalpha()) / len(decoded)
    score += letter_ratio * 30
    common_patterns = [' the ', ' and ', ' is ', ' to ', ' a ', ' of ', ' in ', ' it ', ' you ', ' that ']
    lower_decoded = decoded.lower()
    pattern_matches = sum(1 for p in common_patterns if p in lower_decoded)
    score += min(pattern_matches * 5, 20)
    score -= (decoded.count('\ufffd') + decoded.count('?')) * 5
    control_chars = sum(1 for c in decoded if ord(c) < 32 and c not in '\n\r\t')
    score -= control_chars * 10
    return max(0.0, min(100.0, score))




[docs]
def detect_encoding_type(text: str) -> List[str]:
    """Detect encoding type.

        Args:
            text (str): Text content.

        Returns:
            List[str]: The result.
        """
    candidates = []
    cleaned = text.strip()
    if '%' in cleaned and re.search(r'%[0-9A-Fa-f]{2}', cleaned):
        candidates.append('url')
    if re.search(r'&[#\w]+;', cleaned):
        candidates.append('html')
    if re.match(r'^[\.\-\s/|]+$', cleaned) and re.search(r'[\.\-]', cleaned):
        candidates.append('morse')
    if re.match(r'^[01\s]+$', cleaned):
        binary_cleaned = re.sub(r'\s+', '', cleaned)
        if len(binary_cleaned) >= 8 and len(binary_cleaned) % 8 == 0:
            candidates.append('binary')
    hex_cleaned = re.sub(r'(0x|\\x|\s|:|-)', '', cleaned, flags=re.IGNORECASE)
    if re.match(r'^[0-9A-Fa-f]+$', hex_cleaned) and len(hex_cleaned) % 2 == 0:
        candidates.append('hex')
    if cleaned.startswith('<~') and cleaned.endswith('~>'):
        candidates.append('ascii85')
    elif re.match(r'^[!-u]+$', cleaned):
        candidates.append('ascii85')
    base32_pattern = re.sub(r'=+$', '', cleaned.upper())
    if re.match(r'^[A-Z2-7]+$', base32_pattern):
        candidates.append('base32')
    if re.match(r'^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$', cleaned):
        candidates.append('base58')
    if re.match(r'^[0-9A-Za-z]+$', cleaned):
        candidates.append('base62')
    base64_cleaned = re.sub(r'=+$', '', cleaned)
    if re.match(r'^[A-Za-z0-9+/]+$', base64_cleaned) or re.match(r'^[A-Za-z0-9_-]+$', base64_cleaned):
        candidates.append('base64')
    if any(c.isalpha() for c in cleaned):
        candidates.append('rot13')
    if any(33 <= ord(c) <= 126 for c in cleaned):
        candidates.append('rot47')
    return candidates



DECODERS = {
    'hex': ('Hexadecimal', decode_hexadecimal),
    'binary': ('Binary', decode_binary),
    'base64': ('Base64', decode_base64),
    'base32': ('Base32', decode_base32),
    'base58': ('Base58', decode_base58),
    'base62': ('Base62', decode_base62),
    'ascii85': ('ASCII85', decode_ascii85),
    'url': ('URL Encoding', decode_url_encoding),
    'html': ('HTML Entities', decode_html_entities),
    'rot13': ('ROT13', decode_rot13),
    'rot47': ('ROT47', decode_rot47),
    'morse': ('Morse Code', decode_morse),
}

ENCODING_ALIASES = {
    'hexadecimal': 'hex', 'base-64': 'base64', 'base-32': 'base32',
    'base-58': 'base58', 'base-62': 'base62', 'base85': 'ascii85',
    'percent': 'url', 'percent-encoding': 'url', 'url-encoding': 'url',
    'html-entities': 'html', 'morse-code': 'morse',
}


async def _universal_decode(encoded_text: str, encoding_hint: str = None, ctx: ToolContext | None = None) -> str:
    """Internal helper: universal decode.

        Args:
            encoded_text (str): The encoded text value.
            encoding_hint (str): The encoding hint value.
            ctx (ToolContext | None): Tool execution context providing access to bot internals.

        Returns:
            str: Result string.
        """
    if not encoded_text or not encoded_text.strip():
        return json.dumps({"error": "No encoded text provided", "decoded_text": None, "encoding_detected": None})

    results = []

    if encoding_hint:
        hint_lower = encoding_hint.lower().strip()
        hint_key = ENCODING_ALIASES.get(hint_lower, hint_lower)
        if hint_key in DECODERS:
            name, decoder = DECODERS[hint_key]
            decoded = decoder(encoded_text)
            if decoded is not None:
                score = score_decoding(decoded)
                return json.dumps({
                    "encoding_detected": name, "decoded_text": decoded,
                    "confidence": round(score, 1), "hint_used": True, "alternatives": [],
                }, ensure_ascii=False)
        else:
            return json.dumps({
                "error": f"Unknown encoding hint: {encoding_hint}",
                "valid_hints": list(DECODERS.keys()),
                "decoded_text": None, "encoding_detected": None,
            })

    candidates = detect_encoding_type(encoded_text)
    for candidate in candidates:
        if candidate in DECODERS:
            name, decoder = DECODERS[candidate]
            try:
                decoded = decoder(encoded_text)
                if decoded is not None and is_readable_text(decoded, threshold=0.5):
                    results.append({"encoding": name, "decoded": decoded, "score": score_decoding(decoded)})
            except Exception:
                pass

    for key, (name, decoder) in DECODERS.items():
        if key not in candidates:
            try:
                decoded = decoder(encoded_text)
                if decoded is not None and decoded != encoded_text and is_readable_text(decoded, threshold=0.5):
                    score = score_decoding(decoded)
                    if score > 30:
                        results.append({"encoding": name, "decoded": decoded, "score": score})
            except Exception:
                pass

    if not results:
        return json.dumps({
            "error": "Could not decode text with any known encoding",
            "decoded_text": None, "encoding_detected": None,
            "tried_encodings": [DECODERS[c][0] for c in candidates if c in DECODERS],
        })

    results.sort(key=lambda x: x['score'], reverse=True)
    best = results[0]
    alternatives = [
        {"encoding": r['encoding'], "decoded_text": r['decoded'][:200] + ('...' if len(r['decoded']) > 200 else ''), "confidence": round(r['score'], 1)}
        for r in results[1:5] if r['score'] > 20
    ]

    return json.dumps({
        "encoding_detected": best['encoding'], "decoded_text": best['decoded'],
        "confidence": round(best['score'], 1), "alternatives": alternatives,
    }, ensure_ascii=False)


TOOLS = [
    {
        "name": "universal_decode",
        "description": (
            "Decode text from various encoding formats including hexadecimal, binary, "
            "base64, base32, base58, base62, ASCII85, URL encoding, HTML entities, "
            "ROT13, ROT47, and Morse code. Auto-detects the encoding type or accepts an optional hint."
        ),
        "parameters": {
            "type": "object",
            "properties": {
                "encoded_text": {
                    "type": "string",
                    "description": "The encoded text to decode.",
                },
                "encoding_hint": {
                    "type": "string",
                    "description": "Optional hint: hex, binary, base64, base32, base58, base62, ascii85, url, html, rot13, rot47, morse.",
                },
            },
            "required": ["encoded_text"],
        },
        "handler": _universal_decode,
    },
]