40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
import fitz # PyMuPDF
|
||
import os
|
||
from PIL import Image
|
||
import io
|
||
|
||
def extract_images_from_pdf(pdf_path, output_folder):
|
||
# 打开PDF文件
|
||
document = fitz.open(pdf_path)
|
||
# 确保输出文件夹存在
|
||
os.makedirs(output_folder, exist_ok=True)
|
||
|
||
# 遍历每一页
|
||
for page_number in range(len(document)):
|
||
page = document.load_page(page_number)
|
||
# 获取页面中的图片
|
||
images = page.get_images(full=True)
|
||
|
||
for image_index, img in enumerate(images):
|
||
xref = img[0]
|
||
# 提取图片
|
||
base_image = document.extract_image(xref)
|
||
image_bytes = base_image["image"]
|
||
image_ext = base_image["ext"]
|
||
|
||
# 使用Pillow处理图片
|
||
image = Image.open(io.BytesIO(image_bytes))
|
||
# 构建图片文件名
|
||
image_filename = f"page_{page_number + 1}_img_{image_index + 1}.{image_ext}"
|
||
image_path = os.path.join(output_folder, image_filename)
|
||
# 保存图片
|
||
image.save(image_path)
|
||
print(f"保存图片: {image_path}")
|
||
|
||
# 关闭文档
|
||
document.close()
|
||
|
||
# 使用示例
|
||
pdf_path = "/Users/xiangyu/Documents/余氏宗谱(新洲区等支族)/第一册/(5)余氏彩页 P17-40.pdf" # 替换为你的PDF文件路径
|
||
output_folder = "extracted_images" # 替换为你想要保存图片的文件夹
|
||
extract_images_from_pdf(pdf_path, output_folder) |