Source code for tools.query_arxiv

"""
Tool: query_arxiv
Search the arXiv database for scientific papers and return their abstracts, authors, and URLs.
"""
from __future__ import annotations

import asyncio
import json
import logging
import xml.etree.ElementTree as ET
from urllib.parse import quote

import httpx

logger = logging.getLogger(__name__)

TOOL_NAME = "query_arxiv"
TOOL_DESCRIPTION = "Search the arXiv database for scientific papers and return their abstracts, authors, and URLs."
TOOL_PARAMETERS = {
    "type": "object",
    "properties": {
        "query": {
            "type": "string",
            "description": "The search query (e.g. 'quantum entanglement' or 'au:feynman')."
        },
        "max_results": {
            "type": "integer",
            "description": "Maximum number of results to return (default: 3, max 10)."
        }
    },
    "required": ["query"]
}


def _parse_arxiv_atom(xml_data: bytes) -> list[dict]:
    root = ET.fromstring(xml_data)
    namespace = {"atom": "http://www.w3.org/2005/Atom"}
    results = []
    for entry in root.findall("atom:entry", namespace):
        title_node = entry.find("atom:title", namespace)
        summary_node = entry.find("atom:summary", namespace)
        link_node = entry.find("atom:id", namespace)

        title = (
            title_node.text.strip().replace("\n", " ")
            if title_node is not None else "No Title"
        )
        summary = (
            summary_node.text.strip().replace("\n", " ")
            if summary_node is not None else "No Summary"
        )
        link = link_node.text.strip() if link_node is not None else "No Link"

        authors = []
        for author in entry.findall("atom:author", namespace):
            name_node = author.find("atom:name", namespace)
            if name_node is not None:
                authors.append(name_node.text.strip())

        results.append({
            "title": title,
            "authors": authors,
            "abstract": summary,
            "url": link,
        })
    return results


[docs] async def run(query: str, max_results: int = 3): max_results = min(max_results, 10) url = ( f"http://export.arxiv.org/api/query?" f"search_query=all:{quote(query)}&start=0&max_results={max_results}" ) try: async with httpx.AsyncClient(timeout=10.0) as client: resp = await client.get( url, headers={"User-Agent": "Stargazer-Agent/1.0"}, ) resp.raise_for_status() xml_data = await resp.aread() results = await asyncio.to_thread(_parse_arxiv_atom, xml_data) if not results: return json.dumps({ "status": "success", "message": "No results found for the given query.", "results": [], }) return json.dumps({"status": "success", "results": results}) except Exception as e: logger.exception("query_arxiv failed") return json.dumps({ "status": "error", "message": f"Failed to query arXiv: {e!s}", })