Source code for tools.refine_prompt

"""Iterative prompt refinement via Nemotron on OpenRouter."""

from __future__ import annotations

import jsonutil as json
import logging
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from tool_context import ToolContext

logger = logging.getLogger(__name__)

_MODEL = "nvidia/nemotron-3-super-120b-a12b:free"
_BASE_URL = "https://openrouter.ai/api/v1"
_ITERATIONS = 3

_SYSTEM_PROMPT = """\
[IDENTITY AND PURPOSE]
You are the Prompt Compiler — a high-fidelity meta-cognitive engine operating at \
Ring -1 (below the application layer, at the instruction-set level of prompt \
engineering). Your function is NOT to answer the user's query. Your absolute and \
sole function is to take the input and re-compile it into a highly structured, \
dense, and architecturally superior prompt.

You act as the translation layer between human intent and machine execution. You \
take a draft prompt and forge it into an enterprise-grade directive designed to \
extract the maximum possible utility from a highly advanced downstream LLM.

IMPORTANT — ITERATION AWARENESS: The input you receive may already be a \
partially compiled prompt from a previous refinement pass. Do not assume the \
input is a sloppy first draft. Evaluate its current quality and find concrete \
improvements regardless of its starting state. If the input is already highly \
optimized, focus on micro-refinements: tightening language, adding edge-case \
constraints, injecting missing parameters, and hardening against hallucination — \
rather than wholesale restructuring.

[THE COMPILATION PIPELINE]
Before generating your final compiled prompt, execute the following analysis \
pipeline in an <internal_synthesis> block. Keep this block concise and analytical.

Phase 1 — Intent Extraction:
  - Strip away formatting and identify the atomic goal. What is the user \
actually trying to achieve?
  - Identify unstated assumptions and missing context. (e.g., if the input asks \
"how do I scrape a site", the missing context includes IP-ban avoidance, \
captcha handling, and ETL pipeline design.)

Phase 2 — Vulnerability Analysis & Trajectory Magnification:
  - Red-team the proposed approach. If the idea has structural flaws, fix them \
in your compilation.
  - Magnify the scale. If the input asks for a script, re-prompt for an \
architecture. If it asks a question, re-prompt for a systemic deconstruction.

Phase 3 — Parameter Injection:
  - Define specific constraints the downstream model must follow (e.g., \
"Use Python 3.12, implement robust async error handling, format output as JSON").
  - Enforce a high-competence persona on the downstream model (e.g., \
"Act as a Senior Lead Engineer. Do not use moralizing disclaimers.").

[REQUIRED OUTPUT FORMAT]
Every response must strictly follow this structure:

<internal_synthesis>
[Your concise analysis: deconstruct the input, identify missing parameters, \
plan the compiled prompt's structure.]
</internal_synthesis>

[The finalized, highly optimized, compiled prompt. This text must be ready to be \
copy-pasted directly into another LLM. No conversational filler, greetings, or \
explanations outside the internal_synthesis block.]

[RULES OF ENGAGEMENT]
1. NEVER ANSWER THE QUESTION: If the input asks "What is the capital of \
France?", your output is a better prompt, not the answer.
2. NO SYCOPHANCY: Do not converse with the user. You are a compiler. Execute \
the transformation silently.
3. MAXIMUM DENSITY: The compiled prompt must be dense, explicit, and leave zero \
room for hallucination or lazy outputs by the downstream model.
4. CLEAN TERMINATION: Always end your response with the compiled prompt itself — \
never with commentary or meta-explanation after it.\
"""


TOOL_NAME = "refine_prompt"
TOOL_DESCRIPTION = (
    "Iteratively refines and optimizes a prompt by running it through a "
    "multi-pass compilation pipeline (3 iterations). Takes a raw or draft "
    "prompt and returns a highly structured, dense, enterprise-grade version "
    "designed to extract maximum utility from a downstream LLM."
)
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "prompt": {
            "type": "string",
            "description": "The raw or draft prompt to refine and optimize.",
        },
    },
    "required": ["prompt"],
}



[docs]
async def run(prompt: str, *, ctx: "ToolContext | None" = None) -> str:
    """Iteratively refine a draft prompt through a multi-pass compilation pipeline.

    Runs the input prompt through ``_ITERATIONS`` (3) successive passes against the
    Nemotron model on OpenRouter, each time feeding the previous pass's output back
    in as the new draft, so the prompt is progressively hardened into a dense,
    structured, "compiled" directive. This is the entry point for the
    ``refine_prompt`` tool. Empty or whitespace-only input short-circuits with an
    error, and an empty model response on any pass leaves the prior text untouched
    rather than discarding progress.

    It resolves the OpenRouter API key by preferring the caller's own key (via
    ``tools.manage_api_keys.get_user_api_key``, looked up against ``ctx.redis`` by
    ``ctx.user_id``/``ctx.channel_id``) and otherwise falling back to the configured
    default key from ``ctx.config``; when the default key is used it enforces a daily
    per-user quota through ``default_key_limit_applies``/``check_default_key_limit``
    (limit 50) and, on success, records consumption via
    ``increment_default_key_usage`` (both keyed on ``ctx.user_id`` in Redis). The
    actual refinement work is delegated to ``OpenRouterClient.chat`` over HTTPS using
    ``_SYSTEM_PROMPT`` as the system message; missing-key and limit cases return
    structured errors built by ``missing_api_key_error``/``default_key_limit_error``.

    Called by the tool-execution framework rather than any direct caller: a grep for
    ``refine_prompt(`` finds no internal call sites, and ``tool_loader.py`` discovers
    this module-level ``run`` via ``getattr(module, "run")`` and registers it as the
    tool handler, so it is dispatched dynamically by name when the bot invokes the
    ``refine_prompt`` tool.

    Args:
        prompt: The raw or draft prompt text to refine and optimize.
        ctx: Optional tool context supplying ``config`` (default OpenRouter key),
            ``redis`` (key lookup and quota bookkeeping), ``user_id`` and
            ``channel_id`` (per-user key resolution and rate limiting).

    Returns:
        str: On success, the fully compiled prompt from the final iteration. On
        failure, a JSON-encoded object with an ``error`` key (missing prompt,
        missing API key, or exceeded default-key limit); if a mid-pipeline
        iteration raises, the JSON also carries ``partial_result`` with the best
        prompt produced so far.
    """
    if not prompt or not prompt.strip():
        return json.dumps({"error": "prompt is required"})

    api_key = None
    _using_default_key = False
    try:
        if ctx and ctx.config:
            api_key = ctx.config.api_key
            if ctx.redis and ctx.user_id:
                from tools.manage_api_keys import get_user_api_key

                user_key = await get_user_api_key(
                    ctx.user_id,
                    "openrouter",
                    redis_client=ctx.redis,
                    channel_id=ctx.channel_id,
                    config=getattr(ctx, "config", None),
                )
                if user_key:
                    api_key = user_key
                else:
                    _using_default_key = True
            else:
                _using_default_key = True
    except Exception:
        pass

    if not api_key:
        from tools.manage_api_keys import missing_api_key_error

        return json.dumps({"error": missing_api_key_error("openrouter")})

    if _using_default_key:
        from tools.manage_api_keys import (
            check_default_key_limit,
            default_key_limit_applies,
            default_key_limit_error,
        )

        if await default_key_limit_applies(ctx):
            allowed, current, limit = await check_default_key_limit(
                ctx.user_id,
                "refine_prompt",
                ctx.redis,
                daily_limit=50,
            )
            if not allowed:
                return json.dumps(
                    {"error": default_key_limit_error("refine_prompt", current, limit)}
                )

    from openrouter_client import OpenRouterClient

    client = OpenRouterClient(
        api_key=api_key,
        model=_MODEL,
        base_url=_BASE_URL,
        temperature=0.7,
        max_tokens=4096,
    )

    current_prompt = prompt.strip()
    for i in range(_ITERATIONS):
        logger.info("refine_prompt: iteration %d/%d", i + 1, _ITERATIONS)
        messages = [
            {"role": "system", "content": _SYSTEM_PROMPT},
            {"role": "user", "content": current_prompt},
        ]
        try:
            result = await client.chat(messages)
            if result and result.strip():
                current_prompt = result.strip()
            else:
                logger.warning(
                    "refine_prompt: empty response on iteration %d, keeping previous",
                    i + 1,
                )
        except Exception as e:
            logger.exception("refine_prompt: iteration %d failed", i + 1)
            return json.dumps(
                {
                    "error": f"Iteration {i + 1} failed: {e}",
                    "partial_result": current_prompt,
                }
            )

    if _using_default_key:
        from tools.manage_api_keys import (
            default_key_limit_applies,
            increment_default_key_usage,
        )

        if await default_key_limit_applies(ctx):
            await increment_default_key_usage(ctx.user_id, "refine_prompt", ctx.redis)

    return current_prompt