#!/usr/bin/env python3
# folder_to_pdf_with_bookmarks.py
import os
import sys
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from PIL import Image, ImageOps
from PyPDF2 import PdfWriter, PdfReader
import io
def add_image_page(writer, image_path):
"""将一张图片添加为 PDF 页面,自动旋转并缩放"""
try:
img = Image.open(image_path)
img = ImageOps.exif_transpose(img)
img_width_px, img_height_px = img.size
max_w, max_h = letter
scale = min(max_w / img_width_px, max_h / img_height_px, 1.0)
pdf_w = img_width_px * scale
pdf_h = img_height_px * scale
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=(pdf_w, pdf_h))
c.drawImage(image_path, 0, 0, width=pdf_w, height=pdf_h)
c.showPage()
c.save()
buffer.seek(0)
reader = PdfReader(buffer)
writer.add_page(reader.pages[0])
return True
except Exception as e:
print(f"⚠️ 跳过无法处理的文件: {image_path} ({e})")
return False
def build_pdf_with_bookmarks(root_folder: Path, output_pdf: Path):
supported_ext = {'.jpg', '.jpeg', '.png', '.pdf'}
file_entries = [] # (path, level, title, typ)
def walk(current_path: Path, level=0):
try:
for item in sorted(current_path.iterdir()):
if item.name.startswith('.'):
continue
if item.is_dir():
file_entries.append((item, level, item.name, 'dir'))
walk(item, level + 1)
elif item.suffix.lower() in supported_ext:
file_entries.append((item, level, item.stem, 'file'))
except PermissionError:
pass
walk(root_folder)
# 检查是否有有效文件
if not any(typ == 'file' for _, _, _, typ in file_entries):
print("❌ 没有找到支持的文件(.jpg/.jpeg/.png/.pdf)")
return
# === 第一阶段:生成 PDF 内容,并记录每个文件的起始页码 ===
writer = PdfWriter()
current_page = 0
file_page_map = {} # path -> start_page_index
for path, level, title, typ in file_entries:
if typ == 'file':
start_page = current_page
if path.suffix.lower() == '.pdf':
try:
reader = PdfReader(path)
for page in reader.pages:
writer.add_page(page)
current_page += 1
file_page_map[path] = start_page
except Exception as e:
print(f"⚠️ 跳过损坏的 PDF: {path} ({e})")
else:
if add_image_page(writer, str(path)):
file_page_map[path] = start_page
current_page += 1
if current_page == 0:
print("❌ 未生成任何有效页面")
return
# === 第二阶段:为每个目录分配页码(使用绝对路径字符串前缀匹配)===
dir_page_map = {}
all_dirs = [path for path, _, _, typ in file_entries if typ == 'dir']
sorted_files = sorted(file_page_map.items(), key=lambda x: x[1]) # 按页码升序
for dir_path in all_dirs:
dir_prefix = str(dir_path) + os.sep
for file_path, page_num in sorted_files:
if str(file_path).startswith(dir_prefix):
dir_page_map[dir_path] = page_num
break # 找到子树中最早的文件即可
# 确保根目录也有书签(指向第一页)
if file_page_map:
dir_page_map[root_folder] = min(file_page_map.values())
# === 第三阶段:构建书签条目(包括根目录)===
bookmark_entries = []
# 手动添加根目录(level=0)
if root_folder in dir_page_map:
bookmark_entries.append((root_folder.name, 0, dir_page_map[root_folder]))
# 添加其他目录和文件(按遍历顺序)
for path, level, title, typ in file_entries:
if typ == 'dir':
if path in dir_page_map:
bookmark_entries.append((title, level + 1, dir_page_map[path]))
# 如果需要文件也加书签,取消注释下面几行:
# elif typ == 'file':
# if path in file_page_map:
# bookmark_entries.append((title, level + 1, file_page_map[path]))
if not bookmark_entries:
print("❌ 无有效书签可添加")
return
# === 第四阶段:写入 PDF 并添加书签 ===
temp_pdf = output_pdf.with_name("temp_output.pdf")
with open(temp_pdf, "wb") as f:
writer.write(f)
reader = PdfReader(temp_pdf)
final_writer = PdfWriter()
for page in reader.pages:
final_writer.add_page(page)
parent_stack = [None] * 100
for title, level, page_idx in bookmark_entries:
parent = parent_stack[level - 1] if level > 0 else None
try:
bookmark = final_writer.add_outline_item(title, page_idx, parent=parent)
parent_stack[level] = bookmark
for i in range(level + 1, len(parent_stack)):
parent_stack[i] = None
except Exception as e:
print(f"⚠️ 书签添加失败: {title} ({e})")
with open(output_pdf, "wb") as f:
final_writer.write(f)
temp_pdf.unlink(missing_ok=True)
print(f"✅ 已生成带完整目录结构书签的 PDF: {output_pdf}")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("用法: python3 folder_to_pdf_with_bookmarks.py <输入文件夹> <输出.pdf>")
print("示例: python3 folder_to_pdf_with_bookmarks.py ./my_book ./output.pdf")
sys.exit(1)
input_folder = Path(sys.argv[1]).resolve()
output_file = Path(sys.argv[2]).resolve()
if not input_folder.exists():
print(f"❌ 输入文件夹不存在: {input_folder}")
sys.exit(1)
if output_file.suffix.lower() != '.pdf':
print("⚠️ 输出文件建议使用 .pdf 扩展名")
build_pdf_with_bookmarks(input_folder, output_file)
最后修改:2025 年 12 月 17 日
© 允许规范转载