"""Advanced PDF Generator Tool.
Creates complex PDFs from JSON specifications with text, images, tables,
headers, links, and various styling options across multiple pages.
"""
from __future__ import annotations
import asyncio
import io
import json
import logging
import os
import re
import tempfile
import uuid
from typing import Dict, List, Optional, Any, Union, Tuple, TYPE_CHECKING
if TYPE_CHECKING:
from tool_context import ToolContext
try:
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.pdfgen import canvas
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image, PageBreak
from reportlab.platypus.flowables import KeepTogether, Flowable
import httpx
from PIL import Image as PILImage
except ImportError as e:
logging.error(f"Required dependencies not installed: {e}")
colors = None
canvas = None
SimpleDocTemplate = None
Flowable = object
logger = logging.getLogger(__name__)
[docs]
class LinkFlowable(Flowable):
"""LinkFlowable (inherits from Flowable).
Attributes:
text: The text.
url: The url.
style: The style.
width: The width.
height: The height.
paragraph: The paragraph.
"""
[docs]
def __init__(self, text, url, style=None):
"""Initialize the instance.
Args:
text: Text content.
url: URL string.
style: The style value.
"""
Flowable.__init__(self)
self.text = text
self.url = url
self.style = style or getSampleStyleSheet()['Normal']
self.width = 0
self.height = 0
self.paragraph = Paragraph(self.text, self.style)
[docs]
def wrap(self, availWidth, availHeight):
"""Wrap.
Args:
availWidth: The availWidth value.
availHeight: The availHeight value.
"""
self.width, self.height = self.paragraph.wrap(availWidth, availHeight)
return self.width, self.height
[docs]
def draw(self):
"""Draw.
"""
self.paragraph.drawOn(self.canv, 0, 0)
self.canv.linkURL(self.url, (0, 0, self.width, self.height), relative=0)
[docs]
class PDFGenerator:
"""PDFGenerator.
Attributes:
page_size: The page size.
styles: The styles.
"""
[docs]
def __init__(self, page_size: str = "A4"):
"""Initialize the instance.
Args:
page_size (str): The page size value.
"""
if not all([colors, canvas, SimpleDocTemplate]):
raise ImportError("Required PDF generation dependencies not available")
self.page_size = A4 if page_size.upper() == "A4" else letter
self.width, self.height = self.page_size
self.styles = getSampleStyleSheet()
self.styles.add(ParagraphStyle(name='CustomHeader', parent=self.styles['Heading1'], fontSize=24, spaceAfter=30, alignment=1))
self.styles.add(ParagraphStyle(name='CustomSubHeader', parent=self.styles['Heading2'], fontSize=18, spaceAfter=20))
self.styles.add(ParagraphStyle(name='CustomBody', parent=self.styles['Normal'], fontSize=12, spaceAfter=12))
def _parse_color(self, color_spec):
"""Internal helper: parse color.
Args:
color_spec: The color spec value.
"""
if isinstance(color_spec, str):
color_map = {'red': colors.red, 'blue': colors.blue, 'green': colors.green, 'black': colors.black, 'white': colors.white, 'gray': colors.gray}
return color_map.get(color_spec.lower(), colors.black)
elif isinstance(color_spec, list) and len(color_spec) >= 3:
r, g, b = color_spec[:3]
a = color_spec[3] if len(color_spec) == 4 else 255
return colors.Color(r/255.0, g/255.0, b/255.0, a/255.0)
return colors.black
def _download_image(self, url):
"""Internal helper: download image.
Args:
url: URL string.
"""
from tools._safe_http import assert_safe_http_url
try:
url = assert_safe_http_url(str(url).strip())
except ValueError as exc:
logger.error("Blocked PDF image URL: %s", exc)
return None
try:
with httpx.Client(timeout=10) as client:
response = client.get(url)
response.raise_for_status()
return response.content
except Exception as e:
logger.error(f"Failed to download image from {url}: {e}")
return None
def _get_font_name(self, base_font, is_bold, is_italic):
"""Internal helper: get font name.
Args:
base_font: The base font value.
is_bold: The is bold value.
is_italic: The is italic value.
"""
font_styles = {
'helvetica': {(True, True): 'Helvetica-BoldOblique', (True, False): 'Helvetica-Bold', (False, True): 'Helvetica-Oblique', (False, False): 'Helvetica'},
'times': {(True, True): 'Times-BoldItalic', (True, False): 'Times-Bold', (False, True): 'Times-Italic', (False, False): 'Times-Roman'},
'courier': {(True, True): 'Courier-BoldOblique', (True, False): 'Courier-Bold', (False, True): 'Courier-Oblique', (False, False): 'Courier'},
}
base = base_font.lower()
if base in font_styles:
return font_styles[base][(is_bold, is_italic)]
suffix = '-BoldItalic' if is_bold and is_italic else '-Bold' if is_bold else '-Italic' if is_italic else ''
return base_font + suffix
def _create_text_element(self, content, style_spec):
"""Internal helper: create text element.
Args:
content: Content data.
style_spec: The style spec value.
"""
if style_spec.get('parse_markdown_links', False):
return self._parse_markdown_links(content, style_spec)
base_style_name = style_spec.get('base_style', 'Normal')
if base_style_name not in self.styles:
base_style_name = 'Normal'
style = self.styles[base_style_name]
if 'size' in style_spec or 'font_size' in style_spec:
style.fontSize = style_spec.get('size', style_spec.get('font_size', style.fontSize))
if 'color' in style_spec:
style.textColor = self._parse_color(style_spec['color'])
font_style = style_spec.get('font_style', '')
is_bold = style_spec.get('bold', False) or 'B' in font_style.upper()
is_italic = style_spec.get('italic', False) or 'I' in font_style.upper()
base_font = style_spec.get('font', 'Helvetica')
style.fontName = self._get_font_name(base_font, is_bold, is_italic)
if 'align' in style_spec:
align_map = {'left': 0, 'center': 1, 'right': 2, 'justify': 4, 'c': 1, 'l': 0, 'r': 2, 'j': 4}
style.alignment = align_map.get(style_spec['align'].lower(), 0)
return Paragraph(content, style)
def _parse_markdown_links(self, text, style_spec):
"""Internal helper: parse markdown links.
Args:
text: Text content.
style_spec: The style spec value.
"""
link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
elements = []
last_end = 0
for match in re.finditer(link_pattern, text):
if match.start() > last_end:
pre_text = text[last_end:match.start()]
if pre_text.strip():
elements.append(Paragraph(pre_text, self.styles['Normal']))
link_style = ParagraphStyle(name='MarkdownLink', parent=self.styles['Normal'], textColor=colors.blue, underline=True)
if 'font_size' in style_spec or 'size' in style_spec:
link_style.fontSize = style_spec.get('size', style_spec.get('font_size', link_style.fontSize))
elements.append(LinkFlowable(match.group(1), match.group(2), link_style))
last_end = match.end()
if last_end < len(text):
post_text = text[last_end:]
if post_text.strip():
elements.append(Paragraph(post_text, self.styles['Normal']))
return elements or [Paragraph(text, self.styles['Normal'])]
def _create_title_element(self, title_spec):
"""Internal helper: create title element.
Args:
title_spec: The title spec value.
"""
text = title_spec.get('text', '')
if not text:
return None
title_style = ParagraphStyle(name='Title', parent=self.styles['Heading1'], fontSize=title_spec.get('font_size', 28), spaceAfter=40, alignment=1, fontName='Helvetica-Bold')
if 'align' in title_spec:
align_map = {'left': 0, 'center': 1, 'right': 2, 'c': 1, 'l': 0, 'r': 2}
title_style.alignment = align_map.get(title_spec['align'].lower(), 1)
return Paragraph(text, title_style)
def _create_header_element(self, header_spec):
"""Internal helper: create header element.
Args:
header_spec: The header spec value.
"""
text = header_spec.get('text', '')
if not text:
return None
level = max(1, min(6, header_spec.get('level', 1)))
style_map = {1: 'Heading1', 2: 'Heading2', 3: 'Heading3', 4: 'Heading4', 5: 'Heading5', 6: 'Normal'}
header_style = ParagraphStyle(name=f'Header{level}', parent=self.styles[style_map.get(level, 'Heading1')], spaceAfter=20 - level * 2)
if 'font_size' in header_spec:
header_style.fontSize = header_spec['font_size']
if 'align' in header_spec:
align_map = {'left': 0, 'center': 1, 'right': 2, 'c': 1, 'l': 0, 'r': 2}
header_style.alignment = align_map.get(header_spec['align'].lower(), 0)
return Paragraph(text, header_style)
def _create_link_element(self, link_spec):
"""Internal helper: create link element.
Args:
link_spec: The link spec value.
"""
text = link_spec.get('text', '')
url = link_spec.get('url', '')
if not text or not url:
return None
link_style = ParagraphStyle(name='Link', parent=self.styles['Normal'], textColor=colors.blue, underline=True)
if 'font_size' in link_spec:
link_style.fontSize = link_spec['font_size']
return LinkFlowable(text, url, link_style)
def _create_image_element(self, image_spec):
"""Internal helper: create image element.
Args:
image_spec: The image spec value.
"""
try:
image_data = None
if 'url' in image_spec:
image_data = self._download_image(image_spec['url'])
elif 'base64' in image_spec:
import base64
image_data = base64.b64decode(image_spec['base64'])
elif 'path' in image_spec:
with open(image_spec['path'], 'rb') as f:
image_data = f.read()
if not image_data:
return None
temp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
try:
pil_image = PILImage.open(io.BytesIO(image_data))
pil_image.save(temp_file.name, 'PNG')
temp_file.close()
img = Image(temp_file.name)
if 'width' in image_spec:
img.drawWidth = image_spec['width']
if 'height' in image_spec:
img.drawHeight = image_spec['height']
caption = image_spec.get('caption')
if caption:
caption_style = ParagraphStyle(name='ImageCaption', parent=self.styles['Normal'], fontSize=10, spaceBefore=5, fontName='Helvetica-Oblique')
return KeepTogether([img, Paragraph(caption, caption_style)])
return img
finally:
try:
os.unlink(temp_file.name)
except Exception:
pass
except Exception as e:
logger.error(f"Failed to create image element: {e}")
return None
def _create_table_element(self, table_spec):
"""Internal helper: create table element.
Args:
table_spec: The table spec value.
"""
try:
headers = table_spec.get('headers', [])
data = table_spec.get('data', [])
if not headers and not data:
return None
if not headers and data:
headers = data[0]
data = data[1:] if len(data) > 1 else []
table_data = [headers] + data if headers else data
table = Table(table_data)
style_commands = []
if headers:
style_commands.extend([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, 0), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 14),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
])
style_commands.append(('GRID', (0, 0), (-1, -1), 1, colors.black))
table.setStyle(TableStyle(style_commands))
return table
except Exception as e:
logger.error(f"Failed to create table element: {e}")
return None
[docs]
def generate_pdf(self, pdf_spec):
"""Generate pdf.
Args:
pdf_spec: The pdf spec value.
"""
try:
buffer = io.BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=self.page_size, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
story = []
pages = pdf_spec.get('pages', [{"elements": []}])
for page_spec in pages:
for element in page_spec.get('elements', []):
element_type = element.get('type', '').lower()
if element_type in ('text', 'paragraph'):
text_elem = self._create_text_element(element.get('text', element.get('content', '')), element)
if isinstance(text_elem, list):
story.extend(text_elem)
else:
story.append(text_elem)
elif element_type == 'title':
elem = self._create_title_element(element)
if elem:
story.append(elem)
elif element_type == 'header':
elem = self._create_header_element(element)
if elem:
story.append(elem)
elif element_type == 'image':
elem = self._create_image_element(element)
if elem:
story.append(elem)
elif element_type == 'table':
elem = self._create_table_element(element)
if elem:
story.append(elem)
elif element_type == 'spacer':
story.append(Spacer(1, element.get('height', 12)))
elif element_type == 'link':
elem = self._create_link_element(element)
if elem:
story.append(elem)
elif element_type == 'newpage':
story.append(PageBreak())
continue
story.append(Spacer(1, 12))
if page_spec != pages[-1]:
story.append(PageBreak())
doc.build(story)
pdf_content = buffer.getvalue()
buffer.close()
return pdf_content
except Exception as e:
logger.error(f"Failed to generate PDF: {e}", exc_info=True)
return None
def _validate_pdf_spec(pdf_spec):
"""Internal helper: validate pdf spec.
Args:
pdf_spec: The pdf spec value.
"""
if not isinstance(pdf_spec, dict):
return False, "PDF specification must be a dictionary"
if 'elements' in pdf_spec:
pdf_spec['pages'] = [{"elements": pdf_spec.pop('elements')}]
if 'pages' not in pdf_spec:
return False, "PDF specification must contain 'pages' key"
pages = pdf_spec['pages']
if not isinstance(pages, list):
return False, "'pages' must be a list"
supported = {'title', 'header', 'paragraph', 'text', 'image', 'table', 'spacer', 'newpage', 'link'}
for i, page in enumerate(pages):
if not isinstance(page, dict) or 'elements' not in page:
return False, f"Page {i} must be a dictionary with 'elements' key"
for j, element in enumerate(page['elements']):
if not isinstance(element, dict) or 'type' not in element:
return False, f"Element {j} on page {i} must have 'type' key"
if element['type'].lower() not in supported:
return False, f"Unsupported element type '{element['type']}'"
return True, "Valid"
TOOL_NAME = "generate_pdf"
TOOL_DESCRIPTION = (
"Generate a PDF from a JSON specification with pages containing "
"text, titles, headers, images, tables, links, spacers, and page breaks. "
"Returns the file path to the generated PDF."
)
TOOL_PARAMETERS = {
"type": "object",
"properties": {
"pdf_spec": {
"type": "string",
"description": (
"JSON string containing PDF specification with 'pages' array. "
"Element types: title, header, paragraph/text, image, table, spacer, link, newpage."
),
},
"filename": {
"type": "string",
"description": "Optional filename for the PDF (defaults to auto-generated).",
},
},
"required": ["pdf_spec"],
}
[docs]
async def run(pdf_spec: str, filename: str = None, ctx: ToolContext | None = None) -> str:
"""Execute this tool and return the result.
Args:
pdf_spec (str): The pdf spec value.
filename (str): The filename value.
ctx (ToolContext | None): Tool execution context providing access to bot internals.
Returns:
str: Result string.
"""
try:
try:
if len(pdf_spec) >= 256 * 1024:
spec_dict = await asyncio.to_thread(json.loads, pdf_spec)
else:
spec_dict = json.loads(pdf_spec)
except json.JSONDecodeError as e:
return f"ERROR: Invalid JSON specification: {e}"
is_valid, error_msg = _validate_pdf_spec(spec_dict)
if not is_valid:
return f"ERROR: Invalid PDF specification: {error_msg}"
if not filename:
title = spec_dict.get('title', 'generated_pdf')
safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).rstrip()
filename = f"{safe_title}_{uuid.uuid4().hex[:8]}.pdf"
if not filename.lower().endswith('.pdf'):
filename += '.pdf'
page_size = spec_dict.get('page_size', 'A4')
def _build_pdf() -> tuple[str, int]:
"""Internal helper: build pdf.
Returns:
tuple[str, int]: The result.
"""
generator = PDFGenerator(page_size=page_size)
pdf_content = generator.generate_pdf(spec_dict)
if not pdf_content:
raise RuntimeError("Failed to generate PDF content")
tmp_dir = tempfile.mkdtemp()
filepath = os.path.join(tmp_dir, filename)
with open(filepath, 'wb') as f:
f.write(pdf_content)
return filepath, len(pdf_content)
filepath, size = await asyncio.to_thread(_build_pdf)
return f"PDF generated: {filepath} ({size} bytes)"
except Exception as e:
logger.error(f"Unexpected error in PDF generation: {e}", exc_info=True)
return f"ERROR: Unexpected error occurred: {e}"