支持批量转换

This commit is contained in:
Snowz 2025-04-26 14:32:43 +08:00
commit 0a92efb96a
13 changed files with 722 additions and 0 deletions

74
README.md Normal file
View File

@ -0,0 +1,74 @@
# PDF工具箱
一个简单易用的PDF处理工具支持多种格式转换和PDF文件处理功能。
## 功能特性
- PDF转Word将PDF文件转换为Word文档
- PDF转图片将PDF文件转换为图片文件
- PDF转Markdown提取PDF文本内容并转换为Markdown格式
- PDF转Excel将PDF中的表格数据提取为Excel文件
- PDF合并将多个PDF文件合并为一个文件
- PDF拆分将PDF文件拆分为多个单页文件
- PDF压缩压缩PDF文件大小
## 环境要求
- Python 3.11 或更高版本
- 所需Python包已在 requirements.txt 中列出
- 对于PDF转Excel功能需要安装Java环境
- 安装Java JDK推荐Java 8或更高版本
- 配置JAVA_HOME环境变量
- 将Java添加到系统PATH中
### Java环境配置步骤仅PDF转Excel功能需要
1. 下载并安装Java JDK
- 访问 [Oracle Java下载页面](https://www.oracle.com/java/technologies/downloads/) 或使用OpenJDK
- 选择适合您系统的Java版本进行下载和安装
2. 配置环境变量
- 右键"此电脑" -> "属性" -> "高级系统设置" -> "环境变量"
- 在系统变量中新建 JAVA_HOME值为Java安装目录C:\Program Files\Java\jdk-x.x.x
- 在系统变量Path中添加 %JAVA_HOME%\bin
3. 验证安装
- 打开命令提示符
- 输入 `java -version`
- 如果显示版本信息,说明配置成功
## 安装步骤
1. 克隆或下载本项目
2. 安装依赖包:
```bash
pip install -r requirements.txt
```
## 打包说明
如果您想将程序打包成可执行文件可以使用PyInstaller进行打包
1. 首先安装PyInstaller
```bash
pip install pyinstaller
```
2. 进入项目目录,运行以下命令进行打包:
```bash
pyinstaller --onefile --windowed --add-data "src;src" --add-data "config.json;." --add-data "requirements.txt;." main.py
```
3. 打包完成后在dist目录下会生成可执行文件。
打包完成后,可执行文件将在 dist/PDF工具箱 目录中生成。
注意事项:
- 打包前请确保已安装所有依赖包
- 如果使用PDF转Excel功能打包后的程序仍然需要Java环境
- 首次运行打包后的程序可能需要稍等片刻
## 使用说明
打包命令的参数说明:
- `--name "PDF工具箱"`: 设置生成的程序名称
- `--windowed`: 不显示控制台窗口
- `--icon=assets/icon.ico`: 设置程序图标(如果有的话)
- `--add-data "assets;assets"`: 包含资源文件(如果有的话)

21
project_structure.txt Normal file
View File

@ -0,0 +1,21 @@
pdf_toolbox/
├── src/
│ ├── __init__.py
│ ├── main.py # 主程序入口
│ ├── ui/
│ │ ├── __init__.py
│ │ ├── main_window.py # 主窗口UI
│ │ └── styles.py # UI样式
│ ├── core/
│ │ ├── __init__.py
│ │ ├── converter.py # 格式转换
│ │ ├── merger.py # PDF合并
│ │ ├── splitter.py # PDF拆分
│ │ ├── compressor.py # PDF压缩
│ │ ├── watermark.py # 水印处理
│ │ └── utils.py # 工具函数
│ └── resources/
│ ├── icons/ # 图标资源
│ └── styles/ # 样式资源
├── requirements.txt # 依赖包
└── README.md # 项目说明

8
requirements.txt Normal file
View File

@ -0,0 +1,8 @@
PyQt6>=6.6.1
pdf2docx==0.5.6
PyMuPDF>=1.23.8
tabula-py>=2.9.0
pandas>=2.2.0
Markdown>=3.5.2
pyinstaller>=6.4.0
Pillow>=10.2.0

15
src/core/compressor.py Normal file
View File

@ -0,0 +1,15 @@
import fitz
class PDFCompressor:
@staticmethod
def compress_pdf(input_path, output_path):
"""
压缩PDF文件
:param input_path: 输入文件路径
:param output_path: 输出文件路径
"""
doc = fitz.open(input_path)
try:
doc.save(output_path, deflate=True, garbage=4)
finally:
doc.close()

81
src/core/converter.py Normal file
View File

@ -0,0 +1,81 @@
import pdf2docx
import fitz
import markdown
import pandas as pd
import os
class PDFConverter:
@staticmethod
def batch_convert(pdf_paths, output_dir, convert_type):
"""
批量转换PDF文件
:param pdf_paths: PDF文件路径列表
:param output_dir: 输出目录
:param convert_type: 转换类型 ('word', 'images', 'markdown', 'excel')
"""
os.makedirs(output_dir, exist_ok=True)
for pdf_path in pdf_paths:
filename = os.path.basename(pdf_path)
name_without_ext = os.path.splitext(filename)[0]
try:
if convert_type == 'word':
output_path = os.path.join(output_dir, f"{name_without_ext}.docx")
PDFConverter.pdf_to_word(pdf_path, output_path)
elif convert_type == 'images':
# 为每个PDF创建单独的图片目录
pdf_images_dir = os.path.join(output_dir, name_without_ext)
os.makedirs(pdf_images_dir, exist_ok=True)
PDFConverter.pdf_to_images(pdf_path, pdf_images_dir)
elif convert_type == 'markdown':
output_path = os.path.join(output_dir, f"{name_without_ext}.md")
PDFConverter.pdf_to_markdown(pdf_path, output_path)
elif convert_type == 'excel':
output_path = os.path.join(output_dir, f"{name_without_ext}.xlsx")
PDFConverter.pdf_to_excel(pdf_path, output_path)
except Exception as e:
print(f"转换文件 {filename} 时出错: {str(e)}")
continue
@staticmethod
def pdf_to_word(pdf_path, output_path):
converter = pdf2docx.Converter(pdf_path)
converter.convert(output_path)
converter.close()
@staticmethod
def pdf_to_images(pdf_path, output_dir):
doc = fitz.open(pdf_path)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
pix = page.get_pixmap()
output_path = os.path.join(output_dir, f'page_{page_num + 1}.png')
pix.save(output_path)
@staticmethod
def pdf_to_markdown(pdf_path, output_path):
# 使用PyMuPDF提取文本并转换为Markdown
doc = fitz.open(pdf_path)
markdown_text = ""
for page in doc:
markdown_text += page.get_text()
with open(output_path, 'w', encoding='utf-8') as f:
f.write(markdown_text)
@staticmethod
def pdf_to_excel(pdf_path, output_path):
"""
将PDF转换为Excel
:param pdf_path: PDF文件路径
:param output_path: 输出文件路径
"""
# 使用tabula-py提取表格数据
import tabula
df = tabula.read_pdf(pdf_path, pages='all')
if df:
df[0].to_excel(output_path, index=False)
else:
# 如果没有找到表格创建一个空的Excel文件
pd.DataFrame().to_excel(output_path, index=False)

20
src/core/merger.py Normal file
View File

@ -0,0 +1,20 @@
import fitz
class PDFMerger:
@staticmethod
def merge_pdfs(pdf_paths, output_path):
"""
合并多个PDF文件
:param pdf_paths: PDF文件路径列表
:param output_path: 输出文件路径
"""
merged_pdf = fitz.open()
try:
for pdf_path in pdf_paths:
with fitz.open(pdf_path) as pdf_doc:
merged_pdf.insert_pdf(pdf_doc)
merged_pdf.save(output_path)
finally:
merged_pdf.close()

22
src/core/splitter.py Normal file
View File

@ -0,0 +1,22 @@
import fitz
import os
class PDFSplitter:
@staticmethod
def split_pdf(pdf_path, output_dir):
"""
拆分PDF文件
:param pdf_path: PDF文件路径
:param output_dir: 输出目录
"""
doc = fitz.open(pdf_path)
try:
for page_num in range(len(doc)):
output_pdf = fitz.open()
output_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)
output_path = os.path.join(output_dir, f'page_{page_num + 1}.pdf')
output_pdf.save(output_path)
output_pdf.close()
finally:
doc.close()

11
src/core/utils.py Normal file
View File

@ -0,0 +1,11 @@
import os
def ensure_dir(directory):
"""确保目录存在,如果不存在则创建"""
if not os.path.exists(directory):
os.makedirs(directory)
def get_output_path(input_path, new_ext):
"""根据输入文件路径生成输出文件路径"""
base = os.path.splitext(input_path)[0]
return f"{base}.{new_ext}"

14
src/main.py Normal file
View File

@ -0,0 +1,14 @@
import sys
from PyQt6.QtWidgets import QApplication
from ui.main_window import MainWindow
from ui.styles import MAIN_STYLE
def main():
app = QApplication(sys.argv)
app.setStyleSheet(MAIN_STYLE)
window = MainWindow()
window.show()
sys.exit(app.exec())
if __name__ == "__main__":
main()

View File

@ -0,0 +1,58 @@
from PIL import Image, ImageDraw, ImageFont
import os
def create_app_icon():
# 创建一个512x512的图像ICO推荐尺寸
size = 512
image = Image.new('RGBA', (size, size), (0, 0, 0, 0))
draw = ImageDraw.Draw(image)
# 绘制圆形背景
background_color = (52, 152, 219) # 使用漂亮的蓝色
draw.ellipse([10, 10, size-10, size-10], fill=background_color)
# 绘制PDF文字
text_color = (255, 255, 255)
font_size = size // 4
try:
# 尝试使用Arial字体如果系统没有则使用默认字体
font = ImageFont.truetype("arial.ttf", font_size)
except:
font = ImageFont.load_default()
text = "PDF"
# 获取文字大小以居中显示
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x = (size - text_width) // 2
y = (size - text_height) // 2
draw.text((x, y), text, font=font, fill=text_color)
# 保存不同尺寸的图标
icon_sizes = [(16, 16), (32, 32), (48, 48), (64, 64), (128, 128), (256, 256)]
icon_images = []
for icon_size in icon_sizes:
resized_image = image.resize(icon_size, Image.Resampling.LANCZOS)
icon_images.append(resized_image)
# 确保resources/icons目录存在
icons_dir = os.path.dirname(os.path.abspath(__file__))
icons_path = os.path.join(icons_dir, 'icons')
os.makedirs(icons_path, exist_ok=True)
# 保存ICO文件
icon_path = os.path.join(icons_path, 'app.ico')
icon_images[0].save(
icon_path,
format='ICO',
sizes=icon_sizes,
append_images=icon_images[1:]
)
print(f"图标已创建: {icon_path}")
if __name__ == "__main__":
create_app_icon()

BIN
src/resources/icons/app.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 741 B

322
src/ui/main_window.py Normal file
View File

@ -0,0 +1,322 @@
from PyQt6.QtWidgets import (QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QFileDialog, QTabWidget,
QLineEdit, QMessageBox, QProgressBar, QListWidget)
from PyQt6.QtCore import Qt
from PyQt6.QtGui import QIcon
from core.converter import PDFConverter
from core.merger import PDFMerger
from core.splitter import PDFSplitter
from core.compressor import PDFCompressor
import os
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("PDF工具箱")
self.setMinimumSize(800, 600)
self.selected_files = []
self.setup_ui()
def setup_ui(self):
# 创建中心部件
central_widget = QWidget()
self.setCentralWidget(central_widget)
layout = QVBoxLayout(central_widget)
# 创建选项卡
tab_widget = QTabWidget()
# 添加各功能选项卡
tab_widget.addTab(self.create_converter_tab(), "格式转换")
tab_widget.addTab(self.create_merger_tab(), "PDF合并")
tab_widget.addTab(self.create_splitter_tab(), "PDF拆分")
tab_widget.addTab(self.create_compressor_tab(), "PDF压缩")
# 删除水印选项卡
layout.addWidget(tab_widget)
def create_converter_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择
select_file_btn = QPushButton("选择PDF文件可多选")
select_file_btn.clicked.connect(self.select_pdf_files)
# 添加文件列表显示
self.files_list_widget = QListWidget()
# 添加输出目录选择
output_dir_layout = QHBoxLayout()
self.output_dir_label = QLabel("输出目录:未选择")
select_output_dir_btn = QPushButton("选择输出目录")
select_output_dir_btn.clicked.connect(self.select_output_directory)
output_dir_layout.addWidget(self.output_dir_label)
output_dir_layout.addWidget(select_output_dir_btn)
# 添加进度条
self.progress_bar = QProgressBar()
self.progress_bar.setVisible(False)
# 添加转换按钮
convert_to_word = QPushButton("批量转换为Word")
convert_to_word.clicked.connect(lambda: self.batch_convert_files("word"))
convert_to_image = QPushButton("批量转换为图片")
convert_to_image.clicked.connect(lambda: self.batch_convert_files("images"))
convert_to_markdown = QPushButton("批量转换为Markdown")
convert_to_markdown.clicked.connect(lambda: self.batch_convert_files("markdown"))
convert_to_excel = QPushButton("批量转换为Excel")
convert_to_excel.clicked.connect(lambda: self.batch_convert_files("excel"))
layout.addWidget(select_file_btn)
layout.addWidget(self.files_list_widget)
layout.addLayout(output_dir_layout)
layout.addWidget(self.progress_bar)
layout.addWidget(convert_to_word)
layout.addWidget(convert_to_image)
layout.addWidget(convert_to_markdown)
layout.addWidget(convert_to_excel)
return widget
def select_pdf_files(self):
files, _ = QFileDialog.getOpenFileNames(
self,
"选择PDF文件",
"",
"PDF文件 (*.pdf)"
)
if files:
self.selected_files = files
self.files_list_widget.clear()
for file in files:
self.files_list_widget.addItem(os.path.basename(file))
def select_output_directory(self):
output_dir = QFileDialog.getExistingDirectory(
self,
"选择输出目录"
)
if output_dir:
self.output_directory = output_dir
self.output_dir_label.setText(f"输出目录:{output_dir}")
def batch_convert_files(self, convert_type):
if not hasattr(self, 'selected_files') or not self.selected_files:
QMessageBox.warning(self, "警告", "请先选择PDF文件")
return
if not hasattr(self, 'output_directory'):
QMessageBox.warning(self, "警告", "请选择输出目录!")
return
try:
self.progress_bar.setVisible(True)
self.progress_bar.setValue(0)
# 计算总文件数用于进度显示
total_files = len(self.selected_files)
# 批量转换文件
PDFConverter.batch_convert(self.selected_files, self.output_directory, convert_type)
self.progress_bar.setValue(100)
QMessageBox.information(self, "成功", "批量转换完成!")
except Exception as e:
QMessageBox.critical(self, "错误", f"转换失败:{str(e)}")
finally:
self.progress_bar.setVisible(False)
def create_merger_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择按钮和文件列表显示
select_files = QPushButton("选择PDF文件")
select_files.clicked.connect(self.select_multiple_pdfs)
self.files_label = QLabel("未选择文件")
# 添加进度条
self.merger_progress = QProgressBar()
self.merger_progress.setVisible(False)
merge_button = QPushButton("合并PDF")
merge_button.clicked.connect(self.merge_pdfs)
layout.addWidget(select_files)
layout.addWidget(self.files_label)
layout.addWidget(self.merger_progress)
layout.addWidget(merge_button)
return widget
def select_multiple_pdfs(self):
files, _ = QFileDialog.getOpenFileNames(
self,
"选择多个PDF文件",
"",
"PDF文件 (*.pdf)"
)
if files:
self.selected_files = files
self.files_label.setText(f"已选择 {len(files)} 个文件")
def merge_pdfs(self):
if len(self.selected_files) < 2:
QMessageBox.warning(self, "警告", "请至少选择两个PDF文件")
return
try:
output_file, _ = QFileDialog.getSaveFileName(
self,
"保存合并后的PDF",
"",
"PDF文件 (*.pdf)"
)
if output_file:
self.merger_progress.setVisible(True)
self.merger_progress.setValue(0)
PDFMerger.merge_pdfs(self.selected_files, output_file)
self.merger_progress.setValue(100)
QMessageBox.information(self, "成功", "PDF合并完成")
except Exception as e:
QMessageBox.critical(self, "错误", f"合并失败:{str(e)}")
finally:
self.merger_progress.setVisible(False)
def create_splitter_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择和拆分按钮
select_file = QPushButton("选择PDF文件")
select_file.clicked.connect(self.select_pdf_for_split)
self.split_file_label = QLabel("未选择文件")
# 添加进度条
self.split_progress = QProgressBar()
self.split_progress.setVisible(False)
split_button = QPushButton("拆分PDF")
split_button.clicked.connect(self.split_pdf)
layout.addWidget(select_file)
layout.addWidget(self.split_file_label)
layout.addWidget(self.split_progress)
layout.addWidget(split_button)
return widget
def select_pdf_for_split(self):
file_name, _ = QFileDialog.getOpenFileName(
self,
"选择要拆分的PDF文件",
"",
"PDF文件 (*.pdf)"
)
if file_name:
self.split_file = file_name
self.split_file_label.setText(f"已选择: {os.path.basename(file_name)}")
def split_pdf(self):
if not hasattr(self, 'split_file'):
QMessageBox.warning(self, "警告", "请先选择PDF文件")
return
try:
output_dir = QFileDialog.getExistingDirectory(
self,
"选择保存目录"
)
if output_dir:
self.split_progress.setVisible(True)
self.split_progress.setValue(0)
PDFSplitter.split_pdf(self.split_file, output_dir)
self.split_progress.setValue(100)
QMessageBox.information(self, "成功", "PDF拆分完成")
except Exception as e:
QMessageBox.critical(self, "错误", f"拆分失败:{str(e)}")
finally:
self.split_progress.setVisible(False)
def create_compressor_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择和压缩按钮
select_file = QPushButton("选择PDF文件")
select_file.clicked.connect(self.select_pdf_for_compress)
self.compress_file_label = QLabel("未选择文件")
# 添加进度条
self.compress_progress = QProgressBar()
self.compress_progress.setVisible(False)
compress_button = QPushButton("压缩PDF")
compress_button.clicked.connect(self.compress_pdf)
layout.addWidget(select_file)
layout.addWidget(self.compress_file_label)
layout.addWidget(self.compress_progress)
layout.addWidget(compress_button)
return widget
def select_pdf_for_compress(self):
file_name, _ = QFileDialog.getOpenFileName(
self,
"选择要压缩的PDF文件",
"",
"PDF文件 (*.pdf)"
)
if file_name:
self.compress_file = file_name
self.compress_file_label.setText(f"已选择: {os.path.basename(file_name)}")
def compress_pdf(self):
if not hasattr(self, 'compress_file'):
QMessageBox.warning(self, "警告", "请先选择PDF文件")
return
try:
output_file, _ = QFileDialog.getSaveFileName(
self,
"保存压缩后的PDF",
"",
"PDF文件 (*.pdf)"
)
if output_file:
self.compress_progress.setVisible(True)
self.compress_progress.setValue(0)
PDFCompressor.compress_pdf(self.compress_file, output_file)
self.compress_progress.setValue(100)
QMessageBox.information(self, "成功", "PDF压缩完成")
except Exception as e:
QMessageBox.critical(self, "错误", f"压缩失败:{str(e)}")
finally:
self.compress_progress.setVisible(False)
# 删除以下水印相关的方法:
# - create_watermark_tab
# - select_watermark_color
# - select_pdf_for_watermark
# - add_watermark

76
src/ui/styles.py Normal file
View File

@ -0,0 +1,76 @@
MAIN_STYLE = """
QMainWindow {
background-color: #f5f6fa;
}
QPushButton {
background-color: #3498db;
color: white;
border: none;
padding: 12px 24px;
border-radius: 6px;
font-size: 14px;
font-weight: bold;
margin: 8px;
min-width: 200px;
}
QPushButton:hover {
background-color: #2980b9;
transition: background-color 0.3s;
}
QPushButton:pressed {
background-color: #2471a3;
}
QLineEdit {
padding: 10px;
border: 2px solid #bdc3c7;
border-radius: 6px;
background-color: white;
font-size: 14px;
margin: 8px;
}
QLineEdit:focus {
border-color: #3498db;
}
QTabWidget::pane {
border: none;
background-color: white;
border-radius: 8px;
margin-top: -1px;
}
QTabBar::tab {
background-color: #ecf0f1;
color: #34495e;
padding: 12px 24px;
margin-right: 2px;
border-top-left-radius: 6px;
border-top-right-radius: 6px;
font-weight: bold;
}
QTabBar::tab:selected {
background-color: white;
color: #2980b9;
border-bottom: 3px solid #3498db;
}
QTabBar::tab:hover:!selected {
background-color: #bdc3c7;
}
QWidget {
font-family: "Microsoft YaHei", "Segoe UI", sans-serif;
}
QLabel {
color: #2c3e50;
font-size: 14px;
margin: 8px;
}
"""