import fitz # PyMuPDF import os from PIL import Image import io def extract_images_from_pdf(pdf_path, output_folder): # 打开PDF文件 document = fitz.open(pdf_path) # 确保输出文件夹存在 os.makedirs(output_folder, exist_ok=True) # 遍历每一页 for page_number in range(len(document)): page = document.load_page(page_number) # 获取页面中的图片 images = page.get_images(full=True) for image_index, img in enumerate(images): xref = img[0] # 提取图片 base_image = document.extract_image(xref) image_bytes = base_image["image"] image_ext = base_image["ext"] # 使用Pillow处理图片 image = Image.open(io.BytesIO(image_bytes)) # 构建图片文件名 image_filename = f"page_{page_number + 1}_img_{image_index + 1}.{image_ext}" image_path = os.path.join(output_folder, image_filename) # 保存图片 image.save(image_path) print(f"保存图片: {image_path}") # 关闭文档 document.close() # 使用示例 pdf_path = "/Users/xiangyu/Documents/余氏宗谱(新洲区等支族)/第一册/(5)余氏彩页 P17-40.pdf" # 替换为你的PDF文件路径 output_folder = "extracted_images" # 替换为你想要保存图片的文件夹 extract_images_from_pdf(pdf_path, output_folder)