"""
Tool: query_arxiv
Search the arXiv database for scientific papers and return their abstracts, authors, and URLs.
"""
from __future__ import annotations
import asyncio
import json
import logging
import xml.etree.ElementTree as ET
from urllib.parse import quote
import httpx
logger = logging.getLogger(__name__)
TOOL_NAME = "query_arxiv"
TOOL_DESCRIPTION = "Search the arXiv database for scientific papers and return their abstracts, authors, and URLs."
TOOL_PARAMETERS = {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query (e.g. 'quantum entanglement' or 'au:feynman')."
},
"max_results": {
"type": "integer",
"description": "Maximum number of results to return (default: 3, max 10)."
}
},
"required": ["query"]
}
def _parse_arxiv_atom(xml_data: bytes) -> list[dict]:
root = ET.fromstring(xml_data)
namespace = {"atom": "http://www.w3.org/2005/Atom"}
results = []
for entry in root.findall("atom:entry", namespace):
title_node = entry.find("atom:title", namespace)
summary_node = entry.find("atom:summary", namespace)
link_node = entry.find("atom:id", namespace)
title = (
title_node.text.strip().replace("\n", " ")
if title_node is not None else "No Title"
)
summary = (
summary_node.text.strip().replace("\n", " ")
if summary_node is not None else "No Summary"
)
link = link_node.text.strip() if link_node is not None else "No Link"
authors = []
for author in entry.findall("atom:author", namespace):
name_node = author.find("atom:name", namespace)
if name_node is not None:
authors.append(name_node.text.strip())
results.append({
"title": title,
"authors": authors,
"abstract": summary,
"url": link,
})
return results
[docs]
async def run(query: str, max_results: int = 3):
max_results = min(max_results, 10)
url = (
f"http://export.arxiv.org/api/query?"
f"search_query=all:{quote(query)}&start=0&max_results={max_results}"
)
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(
url,
headers={"User-Agent": "Stargazer-Agent/1.0"},
)
resp.raise_for_status()
xml_data = await resp.aread()
results = await asyncio.to_thread(_parse_arxiv_atom, xml_data)
if not results:
return json.dumps({
"status": "success",
"message": "No results found for the given query.",
"results": [],
})
return json.dumps({"status": "success", "results": results})
except Exception as e:
logger.exception("query_arxiv failed")
return json.dumps({
"status": "error",
"message": f"Failed to query arXiv: {e!s}",
})