import os
import json
import fitz
import tkinter as tk
from tkinter import filedialog, messagebox, ttk, simpledialog
from tkinter.scrolledtext import ScrolledText
from PIL import Image, ImageTk
class PDFTextRemoverApp:
def __init__(self, root):
self.root = root
self.root.title("PDF区域文本清除工具")
self.root.geometry("1000x500")
self.root.minsize(900, 400)
# 初始化变量
self.pdf_files = []
self.output_folder = tk.StringVar()
self.text_to_remove = []
self.saved_texts = self.load_config()
self.temp_docs = {}
self.current_preview = None
self.original_image = None
self.process_count = {}
self.preview_size = (0, 0)
self.current_scale = 1.0
# 创建界面
self.create_widgets()
self.update_config_list()
# 绑定窗口事件
self.root.bind("<Configure>", self.on_window_resize)
def create_widgets(self):
"""创建界面组件"""
main_paned = tk.PanedWindow(self.root, orient=tk.HORIZONTAL, sashrelief=tk.RAISED)
main_paned.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
# 左侧操作面板
left_frame = tk.Frame(main_paned)
main_paned.add(left_frame, minsize=400)
# 文本输入区域
input_frame = tk.LabelFrame(left_frame, text="需要删除的文本内容(最多6组)", padx=5, pady=5)
input_frame.pack(fill=tk.X, padx=5, pady=5)
self.text_boxes = []
self.search_results = []
for i in range(6):
row_frame = tk.Frame(input_frame)
row_frame.pack(fill=tk.X, pady=2)
tk.Label(row_frame, text=f"文本{i+1}:", width=6).pack(side=tk.LEFT)
entry = tk.Entry(row_frame)
entry.pack(side=tk.LEFT, expand=True, fill=tk.X, padx=5)
self.text_boxes.append(entry)
result_var = tk.StringVar()
result_entry = tk.Entry(row_frame, width=6, state='readonly', textvariable=result_var)
result_entry.pack(side=tk.RIGHT)
self.search_results.append(result_var)
# 控制按钮区域
btn_frame = tk.Frame(left_frame)
btn_frame.pack(fill=tk.X, pady=10, padx=5)
control_btns = [
("① 导入PDF", self.import_files),
("② 查找文本", self.search_text),
("③ 输出位置", self.select_output_folder),
("④ 开始处理", self.process_files)
]
for text, cmd in control_btns:
tk.Button(btn_frame, text=text, command=cmd, width=10).pack(side=tk.LEFT, padx=2)
# 配置管理
config_frame = tk.Frame(btn_frame)
config_frame.pack(side=tk.RIGHT, padx=10)
self.config_combo = ttk.Combobox(config_frame, width=18, state="readonly")
self.config_combo.pack(side=tk.LEFT)
tk.Button(config_frame, text="保存", command=self.save_config, width=4).pack(side=tk.LEFT, padx=2)
tk.Button(config_frame, text="加载", command=self.load_config_ui, width=4).pack(side=tk.LEFT, padx=2)
tk.Button(config_frame, text="删除", command=self.delete_config, width=4).pack(side=tk.LEFT, padx=2)
# 日志区域
log_frame = tk.LabelFrame(left_frame, text="处理日志")
log_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
self.log_area = ScrolledText(log_frame, height=8, state='disabled')
self.log_area.pack(fill=tk.BOTH, expand=True)
# 右侧预览面板
right_frame = tk.Frame(main_paned)
main_paned.add(right_frame, minsize=260)
# 预览区域
preview_frame = tk.LabelFrame(right_frame, text="处理效果预览", padx=5, pady=5)
preview_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
self.canvas = tk.Canvas(preview_frame, bg='#f0f0f0', bd=2, relief=tk.GROOVE)
self.canvas.pack(fill=tk.BOTH, expand=True)
# 导出按钮
export_btn = tk.Button(right_frame, text="确认导出",
command=self.confirm_export,
bg='#f0f0f0', fg='black',
font=('微软雅黑', 10), height=1)
export_btn.pack(fill=tk.X, padx=5, pady=5)
def mm_to_point(self, mm):
"""毫米转PDF点(1mm = 2.83465 points)"""
return mm * 2.83465
def on_window_resize(self, event):
"""窗口尺寸变化处理"""
if self.original_image:
self.update_preview()
def update_preview(self):
"""更新自适应预览"""
canvas_width = self.canvas.winfo_width() - 20
canvas_height = self.canvas.winfo_height() - 20
if canvas_width <= 0 or canvas_height <= 0:
return
# 计算最佳缩放比例
img_width, img_height = self.original_image.size
width_ratio = canvas_width / img_width
height_ratio = canvas_height / img_height
self.current_scale = min(width_ratio, height_ratio)
# 生成自适应缩略图
new_size = (int(img_width * self.current_scale),
int(img_height * self.current_scale))
img = self.original_image.resize(new_size, Image.Resampling.LANCZOS)
# 更新画布
self.current_preview = ImageTk.PhotoImage(img)
self.canvas.delete("all")
x_offset = (canvas_width - new_size[0]) // 2 + 10
y_offset = (canvas_height - new_size[1]) // 2 + 10
self.canvas.create_image(x_offset, y_offset, anchor=tk.NW, image=self.current_preview)
self.preview_size = (canvas_width, canvas_height)
def log(self, message):
"""记录日志信息"""
self.log_area.config(state='normal')
self.log_area.insert(tk.END, message + "\n")
self.log_area.config(state='disabled')
self.log_area.see(tk.END)
self.root.update()
def import_files(self):
"""导入PDF并显示初始预览"""
self.pdf_files = filedialog.askopenfilenames(
title="选择PDF文件",
filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
)
if self.pdf_files:
self.log(f"成功导入 {len(self.pdf_files)} 个PDF文件")
self.show_initial_preview()
else:
self.log("文件选择已取消")
def show_initial_preview(self):
"""显示原始PDF的初始预览"""
if self.pdf_files:
try:
doc = fitz.open(self.pdf_files[0])
page = doc[0]
pix = page.get_pixmap(dpi=150)
self.original_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
self.update_preview()
doc.close()
except Exception as e:
self.log(f"预览失败:{str(e)}")
def process_files(self):
"""处理文件并更新预览"""
if not self.pdf_files:
messagebox.showwarning("警告", "请先导入PDF文件")
return
texts = [entry.get().strip() for entry in self.text_boxes if entry.get().strip()]
if not texts:
messagebox.showwarning("警告", "请输入要删除的文本")
return
self.temp_docs = {}
try:
for file_path in self.pdf_files:
doc = fitz.open(file_path)
for page in doc:
for text in texts:
text_instances = page.search_for(text)
for inst in text_instances:
rect = fitz.Rect(inst)
rect.x0 -= 1
rect.x1 += 1
rect.y0 -= 1
rect.y1 += 1
page.add_redact_annot(rect, text="", fill=(1,1,1))
for page in doc:
page.apply_redactions()
self.temp_docs[file_path] = doc
self.log(f"处理完成:{os.path.basename(file_path)}")
self.show_processed_preview(file_path)
messagebox.showinfo("完成", "文件处理完成,请预览后导出")
except Exception as e:
self.log(f"处理失败:{str(e)}")
messagebox.showerror("错误", f"处理过程中发生错误:\n{str(e)}")
def show_processed_preview(self, file_path):
"""显示处理后的预览"""
if file_path in self.temp_docs:
doc = self.temp_docs[file_path]
page = doc[0]
pix = page.get_pixmap(dpi=150)
self.original_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
self.update_preview()
def confirm_export(self):
"""确认导出文件"""
if not self.temp_docs:
messagebox.showwarning("警告", "请先处理文件")
return
output_dir = self.output_folder.get()
if not output_dir:
messagebox.showwarning("警告", "请选择输出目录")
return
try:
for file_path, doc in self.temp_docs.items():
base_name = os.path.basename(file_path)
name, ext = os.path.splitext(base_name)
process_count = self.process_count.get(file_path, 0) + 1
self.process_count[file_path] = process_count
output_path = os.path.join(output_dir, f"{name}_v{process_count}{ext}")
doc.save(output_path)
self.log(f"成功导出:{output_path}")
doc.close()
self.temp_docs = {}
self.canvas.delete("all")
messagebox.showinfo("完成", "所有文件已成功导出")
except Exception as e:
self.log(f"导出失败:{str(e)}")
messagebox.showerror("错误", f"导出过程中发生错误:\n{str(e)}")
def load_config(self):
"""加载配置文件"""
config_file = "pdf_configs.json"
if os.path.exists(config_file):
try:
with open(config_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
self.log(f"配置加载失败:{str(e)}")
return {}
return {}
def save_config(self):
"""保存当前配置"""
config_name = simpledialog.askstring("保存配置", "请输入配置名称:", parent=self.root)
if config_name:
config = {
"texts": [entry.get() for entry in self.text_boxes],
"output_folder": self.output_folder.get()
}
self.saved_texts[config_name] = config
self.update_config_list()
self.save_to_file()
self.log(f"配置 '{config_name}' 已保存")
def load_config_ui(self):
"""加载配置到界面"""
config_name = self.config_combo.get()
if config_name and config_name in self.saved_texts:
config = self.saved_texts[config_name]
for i, text in enumerate(config.get("texts", [])):
if i < len(self.text_boxes):
self.text_boxes[i].delete(0, tk.END)
self.text_boxes[i].insert(0, text)
self.output_folder.set(config.get("output_folder", ""))
self.log(f"配置 '{config_name}' 已加载")
def delete_config(self):
"""删除配置"""
config_name = self.config_combo.get()
if config_name and config_name in self.saved_texts:
del self.saved_texts[config_name]
self.update_config_list()
self.save_to_file()
for box in self.text_boxes:
box.delete(0, tk.END)
self.output_folder.set("")
self.log(f"配置 '{config_name}' 已删除")
def update_config_list(self):
"""更新配置下拉列表"""
self.config_combo["values"] = list(self.saved_texts.keys())
if self.saved_texts:
self.config_combo.current(0)
def save_to_file(self):
"""保存配置到文件"""
try:
with open("pdf_configs.json", 'w', encoding='utf-8') as f:
json.dump(self.saved_texts, f, ensure_ascii=False, indent=2)
except Exception as e:
self.log(f"配置保存失败:{str(e)}")
def select_output_folder(self):
"""选择输出目录"""
folder = filedialog.askdirectory(title="选择输出目录")
if folder:
self.output_folder.set(folder)
self.log(f"输出目录设置为:{folder}")
else:
self.log("目录选择已取消")
def search_text(self):
"""查找文本"""
if not self.pdf_files:
messagebox.showwarning("警告", "请先导入PDF文件")
return
search_texts = [entry.get().strip() for entry in self.text_boxes]
results = [0] * 6
try:
for file_path in self.pdf_files:
doc = fitz.open(file_path)
for page in doc:
for i, text in enumerate(search_texts):
if text:
results[i] += len(page.search_for(text))
doc.close()
for i, count in enumerate(results):
self.search_results[i].set(str(count))
self.log("文本查找完成,结果已更新")
except Exception as e:
self.log(f"查找失败:{str(e)}")
if __name__ == "__main__":
root = tk.Tk()
app = PDFTextRemoverApp(root)
root.mainloop()