import cv2 import numpy as np import fitz # PyMuPDF import tkinter as tk from tkinter import ttk from PIL import Image, ImageTk import tkinter.messagebox as messagebox import datetime class PDFProcessor: def __init__(self): self.image = None self.regions = [] def convert_pdf_to_image(self, pdf_path, page_num=0): # 打开PDF文件 doc = fitz.open(pdf_path) page = doc[page_num] # 将PDF页面转换为图片 pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2倍缩放以获得更好的质量 img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # 转换为OpenCV格式 self.image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) return self.image def detect_regions(self): if self.image is None: return [] # 转换为灰度图 gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY) # 使用高斯模糊减少噪声 blurred = cv2.GaussianBlur(gray, (5, 5), 0) # 自适应阈值处理 binary = cv2.adaptiveThreshold( blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # 膨胀操作 kernel = np.ones((3,3), np.uint8) dilated = cv2.dilate(binary, kernel, iterations=1) # 查找所有轮廓 contours, hierarchy = cv2.findContours( dilated, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) # 收集所有可能的矩形区域 candidates = [] min_area = 1000 for cnt in contours: peri = cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, 0.02 * peri, True) x, y, w, h = cv2.boundingRect(cnt) area = w * h aspect_ratio = w / float(h) if h != 0 else 0 if (area > min_area and w > 30 and h > 30 and 0.1 < aspect_ratio < 10): if len(approx) >= 4: candidates.append((x, y, w, h)) # 按宽度从大到小排序 candidates.sort(key=lambda x: x[2], reverse=True) # 查找宽高一致的五个区域 target_regions = [] for i, (x1, y1, w1, h1) in enumerate(candidates): similar_regions = [(x1, y1, w1, h1)] # 在剩余区域中查找宽高相似的区域 for x2, y2, w2, h2 in candidates[i+1:]: # 检查宽高是否相似(允许2%的误差) if (abs(w2 - w1) / w1 < 0.02 and abs(h2 - h1) / h1 < 0.02): similar_regions.append((x2, y2, w2, h2)) # 如果找到至少5个相似区域 if len(similar_regions) >= 5: # 按y坐标排序(从上到下) similar_regions.sort(key=lambda x: x[1]) target_regions = similar_regions[:5] break self.regions = target_regions return self.regions class PDFViewer: def __init__(self, root): self.root = root self.root.title("PDF区域检测器") # 设置窗口初始大小 self.root.geometry("1024x768") # 创建主框架 self.main_frame = ttk.Frame(self.root) self.main_frame.pack(fill=tk.BOTH, expand=True) # 创建工具栏 self.toolbar = ttk.Frame(self.main_frame) self.toolbar.pack(side=tk.TOP, fill=tk.X) # 添加缩放控制 self.zoom_label = ttk.Label(self.toolbar, text="缩放: ") self.zoom_label.pack(side=tk.LEFT, padx=5) self.zoom_scale = ttk.Scale( self.toolbar, from_=10, to=200, orient=tk.HORIZONTAL, length=200, command=self._on_zoom_change ) self.zoom_scale.set(30) # 设置初始缩放为30% self.zoom_scale.pack(side=tk.LEFT, padx=5) # 创建框架来容纳画布和滚动条 self.frame = ttk.Frame(self.main_frame) self.frame.pack(fill=tk.BOTH, expand=True) # 创建水平和垂直滚动条 self.v_scrollbar = ttk.Scrollbar(self.frame, orient="vertical") self.h_scrollbar = ttk.Scrollbar(self.frame, orient="horizontal") self.v_scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.h_scrollbar.pack(side=tk.BOTTOM, fill=tk.X) # 创建画布并配置滚动 self.canvas = tk.Canvas( self.frame, yscrollcommand=self.v_scrollbar.set, xscrollcommand=self.h_scrollbar.set, bg='gray90' # 添加背景色以便于区分 ) self.canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) # 配置滚动条 self.v_scrollbar.config(command=self.canvas.yview) self.h_scrollbar.config(command=self.canvas.xview) # 绑定鼠标事件 self.canvas.bind('', self._on_mousewheel_y) self.canvas.bind('', self._on_mousewheel_x) self.canvas.bind('', self._on_mousewheel_y) self.canvas.bind('', self._on_mousewheel_y) self.canvas.bind('', self._on_mousewheel_x) self.canvas.bind('', self._on_mousewheel_x) self.canvas.bind('', self._start_drag) self.canvas.bind('', self._drag) # 添加Ctrl+鼠标滚轮缩放 self.canvas.bind('', self._on_zoom_wheel) self.processor = PDFProcessor() self.current_image = None self.current_regions = None self.zoom_factor = 0.3 # 改为30%的初始缩放 # 添加适应窗口大小的按钮 self.fit_button = ttk.Button( self.toolbar, text="适应窗口", command=self._fit_to_window ) self.fit_button.pack(side=tk.LEFT, padx=5) # 绑定窗口大小改变事件 self.root.bind('', self._on_window_resize) # 添加页面控制 self.page_frame = ttk.Frame(self.toolbar) self.page_frame.pack(side=tk.LEFT, padx=5) self.page_label = ttk.Label(self.page_frame, text="页码:") self.page_label.pack(side=tk.LEFT) self.current_page = tk.StringVar(value="1") self.total_pages = 1 self.page_entry = ttk.Entry(self.page_frame, textvariable=self.current_page, width=5) self.page_entry.pack(side=tk.LEFT, padx=2) self.total_pages_label = ttk.Label(self.page_frame, text="/1") self.total_pages_label.pack(side=tk.LEFT) self.prev_button = ttk.Button(self.page_frame, text="上一页", command=self._prev_page) self.prev_button.pack(side=tk.LEFT, padx=2) self.next_button = ttk.Button(self.page_frame, text="下一页", command=self._next_page) self.next_button.pack(side=tk.LEFT, padx=2) # 修改确认按钮文本 self.confirm_button = ttk.Button( self.toolbar, text="提取所有页面文字", command=self._extract_text ) self.confirm_button.pack(side=tk.LEFT, padx=5) self.pdf_path = None self.doc = None # 加载默认PDF文件 default_pdf = "test.pdf" try: self.load_pdf(default_pdf) except Exception as e: messagebox.showerror("错误", f"无法加载默认PDF文件:{str(e)}") def load_pdf(self, pdf_path): """加载PDF并显示""" self.pdf_path = pdf_path self.doc = fitz.open(pdf_path) self.total_pages = len(self.doc) self.total_pages_label.configure(text=f"/{self.total_pages}") self._load_current_page() def _load_current_page(self): """加载当前页面""" try: page_num = int(self.current_page.get()) - 1 if 0 <= page_num < self.total_pages: image = self.processor.convert_pdf_to_image(self.pdf_path, page_num) regions = self.processor.detect_regions() self.current_image = image self.current_regions = regions self._update_display() except ValueError: messagebox.showerror("错误", "请输入有效的页码") def _prev_page(self): """显示上一页""" try: current = int(self.current_page.get()) if current > 1: self.current_page.set(str(current - 1)) self._load_current_page() except ValueError: pass def _next_page(self): """显示下一页""" try: current = int(self.current_page.get()) if current < self.total_pages: self.current_page.set(str(current + 1)) self._load_current_page() except ValueError: pass def _update_display(self): """更新显示""" if self.current_image is None: return # 获取原始尺寸 height, width = self.current_image.shape[:2] # 计算缩放后的尺寸 new_width = int(width * self.zoom_factor) new_height = int(height * self.zoom_factor) # 缩放图片 resized_image = cv2.resize(self.current_image, (new_width, new_height)) # 转换并显示图片 image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB) image_pil = Image.fromarray(image) self.photo = ImageTk.PhotoImage(image=image_pil) # 清除画布 self.canvas.delete("all") # 显示图片 self.canvas.create_image(0, 0, image=self.photo, anchor=tk.NW) # 绘制区域 if self.current_regions: for i, (x, y, w, h) in enumerate(self.current_regions): # 缩放坐标和尺寸 scaled_x = int(x * self.zoom_factor) scaled_y = int(y * self.zoom_factor) scaled_w = int(w * self.zoom_factor) scaled_h = int(h * self.zoom_factor) rect_id = self.canvas.create_rectangle( scaled_x, scaled_y, scaled_x + scaled_w, scaled_y + scaled_h, outline="red", width=max(1, int(2 * self.zoom_factor)), tags=f"region_{i+1}" ) self.canvas.create_text( scaled_x + scaled_w//2, scaled_y - 10 * self.zoom_factor, text=f"目标区域 {i+1} ({w}x{h})", fill="red", tags=f"region_{i+1}" ) # 绑定鼠标事件 self.canvas.tag_bind( f"region_{i+1}", '', lambda e, rid=rect_id: self._highlight_region(rid) ) self.canvas.tag_bind( f"region_{i+1}", '', lambda e, rid=rect_id: self._unhighlight_region(rid) ) # 更新画布滚动区域 self.canvas.config(scrollregion=self.canvas.bbox(tk.ALL)) def _on_zoom_wheel(self, event): """处理Ctrl+滚轮缩放""" if event.delta > 0: self.zoom_scale.set(min(200, self.zoom_scale.get() + 10)) else: self.zoom_scale.set(max(10, self.zoom_scale.get() - 10)) def _on_zoom_change(self, value): """处理缩放变化""" self.zoom_factor = float(value) / 100 self._update_display() def _on_mousewheel_y(self, event): """处理垂直方向的鼠标滚轮事件""" if event.num == 4 or event.delta > 0: self.canvas.yview_scroll(-1, "units") elif event.num == 5 or event.delta < 0: self.canvas.yview_scroll(1, "units") def _on_mousewheel_x(self, event): """处理水平方向的鼠标滚轮事件""" if event.num == 4 or event.delta > 0: self.canvas.xview_scroll(-1, "units") elif event.num == 5 or event.delta < 0: self.canvas.xview_scroll(1, "units") def _start_drag(self, event): """开始拖动""" self.canvas.scan_mark(event.x, event.y) def _drag(self, event): """拖动画布""" self.canvas.scan_dragto(event.x, event.y, gain=1) def _highlight_region(self, region_id): """高亮显示区域""" self.canvas.itemconfig(region_id, width=max(2, int(3 * self.zoom_factor)), outline="yellow") def _unhighlight_region(self, region_id): """取消高亮显示""" self.canvas.itemconfig(region_id, width=max(1, int(2 * self.zoom_factor)), outline="red") def _fit_to_window(self): """调整缩放以适应窗口大小""" if self.current_image is None: return # 获取窗口和图像尺寸 window_width = self.canvas.winfo_width() window_height = self.canvas.winfo_height() image_height, image_width = self.current_image.shape[:2] # 计算合适的缩放比例 width_ratio = window_width / image_width height_ratio = window_height / image_height # 选择较小的比例以确保完全显示 new_zoom = min(width_ratio, height_ratio) * 0.9 # 留出一些边距 # 更新缩放 self.zoom_scale.set(new_zoom * 100) def _on_window_resize(self, event): """窗口大小改变时的处理""" # 仅当事件来自主窗口时才处理 if event.widget == self.root: # 更新画布滚动区域 self.canvas.config(scrollregion=self.canvas.bbox(tk.ALL)) def _extract_text(self): """提取所有页面的文字并分别保存到五个文件""" if not self.doc: messagebox.showwarning("警告", "请先加载PDF文件!") return try: # 创建output目录(如果不存在) import os output_dir = "output" if not os.path.exists(output_dir): os.makedirs(output_dir) # 创建五个输出文件(使用追加模式) base_name = os.path.basename(self.pdf_path).rsplit('.', 1)[0] output_files = [open(os.path.join(output_dir, f"{base_name}_region{i+1}.txt"), 'a', encoding='utf-8') for i in range(5)] # 写入分隔符(带时间戳) timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") for i, f in enumerate(output_files): f.write(f"\n\n=== 新的提取任务 {timestamp} ===\n\n") # 处理每一页 for page_num in range(self.total_pages): # 更新当前页面以获取区域 self.current_page.set(str(page_num + 1)) self._load_current_page() if not self.current_regions or len(self.current_regions) < 5: messagebox.showwarning("警告", f"第 {page_num+1} 页未检测到足够区域,跳过该页") continue page = self.doc[page_num] # 按y坐标排序处理每个区域 sorted_regions = sorted(self.current_regions, key=lambda x: x[1]) for region_idx, region in enumerate(sorted_regions[:5]): # 只取前五个区域 x, y, w, h = region # 将OpenCV的坐标转换为PDF坐标(除以2因为之前放大了2倍) pdf_x = x / 2 pdf_y = y / 2 pdf_w = w / 2 pdf_h = h / 2 rect = fitz.Rect(pdf_x, pdf_y, pdf_x + pdf_w, pdf_y + pdf_h) text = page.get_text(clip=rect, sort=True) if text.strip(): # 将多行文本合并为一行,用空格分隔 single_line_text = ' '.join(text.split()) output_files[region_idx].write( f"=== 第 {page_num + 1} 页 ===\n" f"区域坐标: ({x}, {y}) 尺寸: {w}x{h}\n" f"{single_line_text}\n" "-------------------\n\n" ) # 关闭所有文件 for f in output_files: f.close() messagebox.showinfo("成功", f"文字已分别保存到output文件夹中的:\n" f"{base_name}_region1.txt\n" f"{base_name}_region2.txt\n" f"{base_name}_region3.txt\n" f"{base_name}_region4.txt\n" f"{base_name}_region5.txt") except Exception as e: messagebox.showerror("错误", f"提取文字时发生错误:{str(e)}") finally: # 确保文件被关闭 for f in output_files: if not f.closed: f.close() def main(): root = tk.Tk() app = PDFViewer(root) root.mainloop() if __name__ == "__main__": main()