#!/usr/bin/env python3
# folder_to_pdf_with_bookmarks.py
import os
import sys
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from PIL import Image, ImageOps
from PyPDF2 import PdfWriter, PdfReader
import io

def add_image_page(writer, image_path):
    """将一张图片添加为 PDF 页面,自动旋转并缩放"""
    try:
        img = Image.open(image_path)
        img = ImageOps.exif_transpose(img)
        img_width_px, img_height_px = img.size

        max_w, max_h = letter
        scale = min(max_w / img_width_px, max_h / img_height_px, 1.0)
        pdf_w = img_width_px * scale
        pdf_h = img_height_px * scale

        buffer = io.BytesIO()
        c = canvas.Canvas(buffer, pagesize=(pdf_w, pdf_h))
        c.drawImage(image_path, 0, 0, width=pdf_w, height=pdf_h)
        c.showPage()
        c.save()

        buffer.seek(0)
        reader = PdfReader(buffer)
        writer.add_page(reader.pages[0])
        return True
    except Exception as e:
        print(f"⚠️ 跳过无法处理的文件: {image_path} ({e})")
        return False

def build_pdf_with_bookmarks(root_folder: Path, output_pdf: Path):
    supported_ext = {'.jpg', '.jpeg', '.png', '.pdf'}
    file_entries = []  # (path, level, title, typ)

    def walk(current_path: Path, level=0):
        try:
            for item in sorted(current_path.iterdir()):
                if item.name.startswith('.'):
                    continue
                if item.is_dir():
                    file_entries.append((item, level, item.name, 'dir'))
                    walk(item, level + 1)
                elif item.suffix.lower() in supported_ext:
                    file_entries.append((item, level, item.stem, 'file'))
        except PermissionError:
            pass

    walk(root_folder)

    # 检查是否有有效文件
    if not any(typ == 'file' for _, _, _, typ in file_entries):
        print("❌ 没有找到支持的文件(.jpg/.jpeg/.png/.pdf)")
        return

    # === 第一阶段:生成 PDF 内容,并记录每个文件的起始页码 ===
    writer = PdfWriter()
    current_page = 0
    file_page_map = {}  # path -> start_page_index

    for path, level, title, typ in file_entries:
        if typ == 'file':
            start_page = current_page
            if path.suffix.lower() == '.pdf':
                try:
                    reader = PdfReader(path)
                    for page in reader.pages:
                        writer.add_page(page)
                        current_page += 1
                    file_page_map[path] = start_page
                except Exception as e:
                    print(f"⚠️ 跳过损坏的 PDF: {path} ({e})")
            else:
                if add_image_page(writer, str(path)):
                    file_page_map[path] = start_page
                    current_page += 1

    if current_page == 0:
        print("❌ 未生成任何有效页面")
        return

    # === 第二阶段:为每个目录分配页码(使用绝对路径字符串前缀匹配)===
    dir_page_map = {}
    all_dirs = [path for path, _, _, typ in file_entries if typ == 'dir']
    sorted_files = sorted(file_page_map.items(), key=lambda x: x[1])  # 按页码升序

    for dir_path in all_dirs:
        dir_prefix = str(dir_path) + os.sep
        for file_path, page_num in sorted_files:
            if str(file_path).startswith(dir_prefix):
                dir_page_map[dir_path] = page_num
                break  # 找到子树中最早的文件即可

    # 确保根目录也有书签(指向第一页)
    if file_page_map:
        dir_page_map[root_folder] = min(file_page_map.values())

    # === 第三阶段:构建书签条目(包括根目录)===
    bookmark_entries = []

    # 手动添加根目录(level=0)
    if root_folder in dir_page_map:
        bookmark_entries.append((root_folder.name, 0, dir_page_map[root_folder]))

    # 添加其他目录和文件(按遍历顺序)
    for path, level, title, typ in file_entries:
        if typ == 'dir':
            if path in dir_page_map:
                bookmark_entries.append((title, level + 1, dir_page_map[path]))
        # 如果需要文件也加书签,取消注释下面几行:
        # elif typ == 'file':
        #     if path in file_page_map:
        #         bookmark_entries.append((title, level + 1, file_page_map[path]))

    if not bookmark_entries:
        print("❌ 无有效书签可添加")
        return

    # === 第四阶段:写入 PDF 并添加书签 ===
    temp_pdf = output_pdf.with_name("temp_output.pdf")
    with open(temp_pdf, "wb") as f:
        writer.write(f)

    reader = PdfReader(temp_pdf)
    final_writer = PdfWriter()
    for page in reader.pages:
        final_writer.add_page(page)

    parent_stack = [None] * 100
    for title, level, page_idx in bookmark_entries:
        parent = parent_stack[level - 1] if level > 0 else None
        try:
            bookmark = final_writer.add_outline_item(title, page_idx, parent=parent)
            parent_stack[level] = bookmark
            for i in range(level + 1, len(parent_stack)):
                parent_stack[i] = None
        except Exception as e:
            print(f"⚠️ 书签添加失败: {title} ({e})")

    with open(output_pdf, "wb") as f:
        final_writer.write(f)

    temp_pdf.unlink(missing_ok=True)
    print(f"✅ 已生成带完整目录结构书签的 PDF: {output_pdf}")

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("用法: python3 folder_to_pdf_with_bookmarks.py <输入文件夹> <输出.pdf>")
        print("示例: python3 folder_to_pdf_with_bookmarks.py ./my_book ./output.pdf")
        sys.exit(1)
    input_folder = Path(sys.argv[1]).resolve()
    output_file = Path(sys.argv[2]).resolve()
    if not input_folder.exists():
        print(f"❌ 输入文件夹不存在: {input_folder}")
        sys.exit(1)
    if output_file.suffix.lower() != '.pdf':
        print("⚠️ 输出文件建议使用 .pdf 扩展名")
    build_pdf_with_bookmarks(input_folder, output_file)
最后修改:2025 年 12 月 17 日
如果觉得我的文章对你有用,请随意赞赏