Source code for search_query_generator

"""Search Query Generator.

Generates search queries from user prompts using inception/mercury-2 via the
OpenRouter API.  Used by :class:`~web_search_context.WebSearchContextManager`
for automatic web-search context injection.
"""

from __future__ import annotations

import jsonutil as json
import logging
import re
from datetime import datetime
from typing import List

import httpx

logger = logging.getLogger(__name__)

_CHAT_URL = "https://openrouter.ai/api/v1/chat/completions"
_API_KEY = "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"

QUERY_GENERATOR_MODEL = "inception/mercury-2"


def _system_prompt() -> str:
    """Build the query-generator system prompt with the current date baked in.

    Assembles the instruction block that tells the model when to emit web-search
    queries versus an empty array, interpolating today's date and current year
    (via :func:`datetime.datetime.now`) so the model anchors "latest"/"current"
    questions to the right timeframe and stamps generated queries with the
    present year. Pure string construction with no I/O. Called by
    :func:`generate_search_queries` to populate the ``system`` message on every
    request; no other callers (note ``ncm_variant_cache.py`` defines its own
    same-named helper).

    Returns:
        The fully-rendered system-prompt string for the query generator.
    """
    current_date = datetime.now().strftime("%B %d, %Y")
    current_year = datetime.now().strftime("%Y")
    return (
        "You are a search query generator. Your job: decide if the user "
        "needs web search.\n\n"
        f"CURRENT DATE: {current_date}\n\n"
        'DEFAULT: Return empty array {"queries":[]}\n\n'
        "ONLY generate queries if ALL of these are true:\n"
        "1. The user asks a QUESTION (not a statement)\n"
        "2. The question needs CURRENT/LATEST information from the web\n"
        "3. The topic is EXPLICITLY mentioned in the user's message\n\n"
        "Examples that NEED search:\n"
        '- "What is the latest version of Node.js?" -> generate queries\n'
        '- "What happened in tech news today?" -> generate queries\n'
        '- "Current weather in Tokyo?" -> generate queries\n\n'
        "Examples that DON'T need search (return empty):\n"
        '- Statements: "The weather is nice", "Software has bugs"\n'
        '- Roleplay: "*smiles*", "pretend you are a doctor"\n'
        '- Opinions: "I think cats are better", "dogs are cute"\n'
        '- Greetings: "hello", "how are you"\n'
        '- Creative: "write a poem", "tell me a story"\n'
        '- About AI: "what do you think?", "how do you feel?"\n\n'
        "CRITICAL RULE: If the user does NOT mention a specific topic, "
        "do NOT invent queries about random topics. "
        "The queries MUST match what the user actually asked about.\n\n"
        f"Use {current_year} when generating queries about current events "
        "or versions.\n"
        "Maximum 3 queries.\n\n"
        "OUTPUT FORMAT (no other text):\n"
        '{"queries":["search term 1", "search term 2"]}\n\n'
        "If no search needed:\n"
        '{"queries":[]}'
    )



[docs]
async def generate_search_queries(
    prompt: str,
    max_queries: int = 3,
) -> List[str]:
    """Generate search queries from a user prompt via OpenRouter.

    Parameters
    ----------
    prompt:
        The user's message text.
    max_queries:
        Cap on the number of queries returned.

    Returns
    -------
    list[str]
        Search query strings, or an empty list when no search is needed.
    """
    if not prompt or not prompt.strip():
        return []

    model = QUERY_GENERATOR_MODEL
    try:
        logger.info(
            "Generating search queries with %s for: %.150s…",
            model,
            prompt,
        )
        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": _system_prompt()},
                {"role": "user", "content": prompt},
            ],
            "temperature": 0.3,
            "max_tokens": 300,
            "reasoning": {"effort": "none"},
        }
        headers = {
            "Authorization": f"Bearer {_API_KEY}",
            "Content-Type": "application/json",
        }

        async with httpx.AsyncClient(timeout=30.0) as client:
            resp = await client.post(
                _CHAT_URL,
                json=payload,
                headers=headers,
            )
            resp.raise_for_status()

        data = resp.json()
        text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
        logger.info("Query generator raw response (%s): %.500s", model, text)

        if not text:
            return []

        queries = _parse_queries_response(text, max_queries)
        if queries:
            logger.info(
                "Generated %d search queries via %s: %s", len(queries), model, queries
            )
        else:
            logger.info("No search queries from %s (raw: %.200s)", model, text)
        return queries

    except Exception as exc:
        logger.error(
            "Search query generation failed (%s): %s", model, exc, exc_info=True
        )
        return []



# ------------------------------------------------------------------
# Response parsing (multi-strategy)
# ------------------------------------------------------------------


def _parse_queries_response(text: str, max_queries: int = 3) -> List[str]:
    """Extract the search-query list from a possibly messy LLM response.

    Tolerates the model wrapping its answer in reasoning tags, code fences, or
    stray prose by trying three strategies in order: parse a complete JSON
    object carrying a ``queries`` key, regex out a ``"queries": [...]`` array,
    then fall back to any standalone JSON string array (filtering out filler
    tokens like ``query`` or ``true``). The first strategy that yields
    non-empty, de-blanked strings wins, capped at ``max_queries``. Pure string
    and JSON parsing with no I/O; a parse miss is logged at debug through the
    module ``logger`` and returns an empty list. Called by
    :func:`generate_search_queries` to interpret the model output; no other
    callers.

    Args:
        text: The raw assistant message content from the query-generator model.
        max_queries: Maximum number of queries to return; defaults to 3.

    Returns:
        The extracted query strings (at most ``max_queries``), or an empty list
        when nothing parseable was found.
    """
    if not text:
        return []
    text = text.strip()
    text = re.sub(
        r"<think(?:ing)?>\s*.*?</think(?:ing)?>", "", text, flags=re.DOTALL
    ).strip()

    # Strategy 1: find a complete JSON object with a "queries" key.
    try:
        cleaned = text
        if "```" in cleaned:
            m = re.search(r"```(?:json)?\s*(.*?)```", cleaned, re.DOTALL)
            if m:
                cleaned = m.group(1).strip()

        start = cleaned.find("{")
        end = cleaned.rfind("}")
        if start != -1 and end > start:
            obj = json.loads(cleaned[start : end + 1])
            queries = obj.get("queries", [])
            if isinstance(queries, list):
                valid = [q.strip() for q in queries if isinstance(q, str) and q.strip()]
                if valid:
                    return valid[:max_queries]
    except (json.JSONDecodeError, ValueError):
        pass

    # Strategy 2: regex for "queries": [...] pattern.
    try:
        m = re.search(r'"queries"\s*:\s*\[(.*?)\]', text, re.DOTALL)
        if m:
            strings = re.findall(r'"([^"]+)"', m.group(1))
            valid = [
                s.strip()
                for s in strings
                if s.strip() and not s.strip().lower().startswith("query ")
            ]
            if valid:
                return valid[:max_queries]
    except Exception:
        pass

    # Strategy 3: standalone JSON array anywhere in the response.
    try:
        m = re.search(
            r'\[([^\[\]]*"[^"]+(?:"[,\s]*"[^"]+)*"[^\[\]]*)\]',
            text,
        )
        if m:
            strings = re.findall(r'"([^"]+)"', m.group(1))
            filtered = [
                s
                for s in strings
                if len(s) > 5
                and s.lower() not in ("queries", "query", "true", "false", "null")
                and not s.lower().startswith("query ")
            ]
            if filtered:
                return filtered[:max_queries]
    except Exception:
        pass

    logger.debug("Could not extract queries from: %.300s", text)
    return []