feat: 初始化PDF工具箱项目,支持PDF转换、合并、拆分和压缩功能

此次提交初始化了PDF工具箱项目,包括以下核心功能:
1. PDF转Word、图片、Markdown和Excel
2. PDF文件合并
3. PDF文件拆分
4. PDF文件压缩
5. 基于PyQt6的图形用户界面
6. 项目结构搭建和依赖管理
This commit is contained in:
Snowz 2025-04-26 13:11:26 +08:00
commit 6f665d35b9
13 changed files with 647 additions and 0 deletions

45
README.md Normal file
View File

@ -0,0 +1,45 @@
# PDF工具箱
一个简单易用的PDF处理工具支持多种格式转换和PDF文件处理功能。
## 功能特性
- PDF转Word将PDF文件转换为Word文档
- PDF转图片将PDF文件转换为图片文件
- PDF转Markdown提取PDF文本内容并转换为Markdown格式
- PDF转Excel将PDF中的表格数据提取为Excel文件
- PDF合并将多个PDF文件合并为一个文件
- PDF拆分将PDF文件拆分为多个单页文件
- PDF压缩压缩PDF文件大小
## 环境要求
- Python 3.11 或更高版本
- 所需Python包已在 requirements.txt 中列出
- 对于PDF转Excel功能需要安装Java环境
- 安装Java JDK推荐Java 8或更高版本
- 配置JAVA_HOME环境变量
- 将Java添加到系统PATH中
### Java环境配置步骤仅PDF转Excel功能需要
1. 下载并安装Java JDK
- 访问 [Oracle Java下载页面](https://www.oracle.com/java/technologies/downloads/) 或使用OpenJDK
- 选择适合您系统的Java版本进行下载和安装
2. 配置环境变量
- 右键"此电脑" -> "属性" -> "高级系统设置" -> "环境变量"
- 在系统变量中新建 JAVA_HOME值为Java安装目录C:\Program Files\Java\jdk-x.x.x
- 在系统变量Path中添加 %JAVA_HOME%\bin
3. 验证安装
- 打开命令提示符
- 输入 `java -version`
- 如果显示版本信息,说明配置成功
## 安装步骤
1. 克隆或下载本项目
2. 安装依赖包:
```bash
pip install -r requirements.txt

21
project_structure.txt Normal file
View File

@ -0,0 +1,21 @@
pdf_toolbox/
├── src/
│ ├── __init__.py
│ ├── main.py # 主程序入口
│ ├── ui/
│ │ ├── __init__.py
│ │ ├── main_window.py # 主窗口UI
│ │ └── styles.py # UI样式
│ ├── core/
│ │ ├── __init__.py
│ │ ├── converter.py # 格式转换
│ │ ├── merger.py # PDF合并
│ │ ├── splitter.py # PDF拆分
│ │ ├── compressor.py # PDF压缩
│ │ ├── watermark.py # 水印处理
│ │ └── utils.py # 工具函数
│ └── resources/
│ ├── icons/ # 图标资源
│ └── styles/ # 样式资源
├── requirements.txt # 依赖包
└── README.md # 项目说明

8
requirements.txt Normal file
View File

@ -0,0 +1,8 @@
PyQt6>=6.6.1
pdf2docx==0.5.6
PyMuPDF>=1.23.8
tabula-py>=2.9.0
pandas>=2.2.0
Markdown>=3.5.2
pyinstaller>=6.4.0
Pillow>=10.2.0

15
src/core/compressor.py Normal file
View File

@ -0,0 +1,15 @@
import fitz
class PDFCompressor:
@staticmethod
def compress_pdf(input_path, output_path):
"""
压缩PDF文件
:param input_path: 输入文件路径
:param output_path: 输出文件路径
"""
doc = fitz.open(input_path)
try:
doc.save(output_path, deflate=True, garbage=4)
finally:
doc.close()

48
src/core/converter.py Normal file
View File

@ -0,0 +1,48 @@
import pdf2docx
import fitz
import markdown
import pandas as pd
import os
class PDFConverter:
@staticmethod
def pdf_to_word(pdf_path, output_path):
converter = pdf2docx.Converter(pdf_path)
converter.convert(output_path)
converter.close()
@staticmethod
def pdf_to_images(pdf_path, output_dir):
doc = fitz.open(pdf_path)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
pix = page.get_pixmap()
output_path = os.path.join(output_dir, f'page_{page_num + 1}.png')
pix.save(output_path)
@staticmethod
def pdf_to_markdown(pdf_path, output_path):
# 使用PyMuPDF提取文本并转换为Markdown
doc = fitz.open(pdf_path)
markdown_text = ""
for page in doc:
markdown_text += page.get_text()
with open(output_path, 'w', encoding='utf-8') as f:
f.write(markdown_text)
@staticmethod
def pdf_to_excel(pdf_path, output_path):
"""
将PDF转换为Excel
:param pdf_path: PDF文件路径
:param output_path: 输出文件路径
"""
# 使用tabula-py提取表格数据
import tabula
df = tabula.read_pdf(pdf_path, pages='all')
if df:
df[0].to_excel(output_path, index=False)
else:
# 如果没有找到表格创建一个空的Excel文件
pd.DataFrame().to_excel(output_path, index=False)

20
src/core/merger.py Normal file
View File

@ -0,0 +1,20 @@
import fitz
class PDFMerger:
@staticmethod
def merge_pdfs(pdf_paths, output_path):
"""
合并多个PDF文件
:param pdf_paths: PDF文件路径列表
:param output_path: 输出文件路径
"""
merged_pdf = fitz.open()
try:
for pdf_path in pdf_paths:
with fitz.open(pdf_path) as pdf_doc:
merged_pdf.insert_pdf(pdf_doc)
merged_pdf.save(output_path)
finally:
merged_pdf.close()

22
src/core/splitter.py Normal file
View File

@ -0,0 +1,22 @@
import fitz
import os
class PDFSplitter:
@staticmethod
def split_pdf(pdf_path, output_dir):
"""
拆分PDF文件
:param pdf_path: PDF文件路径
:param output_dir: 输出目录
"""
doc = fitz.open(pdf_path)
try:
for page_num in range(len(doc)):
output_pdf = fitz.open()
output_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)
output_path = os.path.join(output_dir, f'page_{page_num + 1}.pdf')
output_pdf.save(output_path)
output_pdf.close()
finally:
doc.close()

11
src/core/utils.py Normal file
View File

@ -0,0 +1,11 @@
import os
def ensure_dir(directory):
"""确保目录存在,如果不存在则创建"""
if not os.path.exists(directory):
os.makedirs(directory)
def get_output_path(input_path, new_ext):
"""根据输入文件路径生成输出文件路径"""
base = os.path.splitext(input_path)[0]
return f"{base}.{new_ext}"

14
src/main.py Normal file
View File

@ -0,0 +1,14 @@
import sys
from PyQt6.QtWidgets import QApplication
from ui.main_window import MainWindow
from ui.styles import MAIN_STYLE
def main():
app = QApplication(sys.argv)
app.setStyleSheet(MAIN_STYLE)
window = MainWindow()
window.show()
sys.exit(app.exec())
if __name__ == "__main__":
main()

View File

@ -0,0 +1,58 @@
from PIL import Image, ImageDraw, ImageFont
import os
def create_app_icon():
# 创建一个512x512的图像ICO推荐尺寸
size = 512
image = Image.new('RGBA', (size, size), (0, 0, 0, 0))
draw = ImageDraw.Draw(image)
# 绘制圆形背景
background_color = (52, 152, 219) # 使用漂亮的蓝色
draw.ellipse([10, 10, size-10, size-10], fill=background_color)
# 绘制PDF文字
text_color = (255, 255, 255)
font_size = size // 4
try:
# 尝试使用Arial字体如果系统没有则使用默认字体
font = ImageFont.truetype("arial.ttf", font_size)
except:
font = ImageFont.load_default()
text = "PDF"
# 获取文字大小以居中显示
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x = (size - text_width) // 2
y = (size - text_height) // 2
draw.text((x, y), text, font=font, fill=text_color)
# 保存不同尺寸的图标
icon_sizes = [(16, 16), (32, 32), (48, 48), (64, 64), (128, 128), (256, 256)]
icon_images = []
for icon_size in icon_sizes:
resized_image = image.resize(icon_size, Image.Resampling.LANCZOS)
icon_images.append(resized_image)
# 确保resources/icons目录存在
icons_dir = os.path.dirname(os.path.abspath(__file__))
icons_path = os.path.join(icons_dir, 'icons')
os.makedirs(icons_path, exist_ok=True)
# 保存ICO文件
icon_path = os.path.join(icons_path, 'app.ico')
icon_images[0].save(
icon_path,
format='ICO',
sizes=icon_sizes,
append_images=icon_images[1:]
)
print(f"图标已创建: {icon_path}")
if __name__ == "__main__":
create_app_icon()

BIN
src/resources/icons/app.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 741 B

309
src/ui/main_window.py Normal file
View File

@ -0,0 +1,309 @@
from PyQt6.QtWidgets import (QMainWindow, QWidget, QVBoxLayout,
QPushButton, QLabel, QFileDialog, QTabWidget,
QLineEdit, QMessageBox, QProgressBar)
from PyQt6.QtCore import Qt
from PyQt6.QtGui import QIcon
from core.converter import PDFConverter
from core.merger import PDFMerger
from core.splitter import PDFSplitter
from core.compressor import PDFCompressor
import os
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("PDF工具箱")
self.setMinimumSize(800, 600)
self.selected_files = []
self.setup_ui()
def setup_ui(self):
# 创建中心部件
central_widget = QWidget()
self.setCentralWidget(central_widget)
layout = QVBoxLayout(central_widget)
# 创建选项卡
tab_widget = QTabWidget()
# 添加各功能选项卡
tab_widget.addTab(self.create_converter_tab(), "格式转换")
tab_widget.addTab(self.create_merger_tab(), "PDF合并")
tab_widget.addTab(self.create_splitter_tab(), "PDF拆分")
tab_widget.addTab(self.create_compressor_tab(), "PDF压缩")
# 删除水印选项卡
layout.addWidget(tab_widget)
def create_converter_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择
select_file_btn = QPushButton("选择PDF文件")
select_file_btn.clicked.connect(self.select_pdf_file)
# 添加文件路径显示
self.file_label = QLabel("未选择文件")
# 添加进度条
self.progress_bar = QProgressBar()
self.progress_bar.setVisible(False)
# 添加转换按钮
convert_to_word = QPushButton("转换为Word")
convert_to_word.clicked.connect(lambda: self.convert_pdf("word"))
convert_to_image = QPushButton("转换为图片")
convert_to_image.clicked.connect(lambda: self.convert_pdf("image"))
convert_to_markdown = QPushButton("转换为Markdown")
convert_to_markdown.clicked.connect(lambda: self.convert_pdf("markdown"))
convert_to_excel = QPushButton("转换为Excel")
convert_to_excel.clicked.connect(lambda: self.convert_pdf("excel"))
layout.addWidget(select_file_btn)
layout.addWidget(self.file_label)
layout.addWidget(self.progress_bar)
layout.addWidget(convert_to_word)
layout.addWidget(convert_to_image)
layout.addWidget(convert_to_markdown)
layout.addWidget(convert_to_excel)
return widget
def select_pdf_file(self):
file_name, _ = QFileDialog.getOpenFileName(
self,
"选择PDF文件",
"",
"PDF文件 (*.pdf)"
)
if file_name:
self.selected_files = [file_name]
self.file_label.setText(f"已选择: {os.path.basename(file_name)}")
def convert_pdf(self, target_format):
if not self.selected_files:
QMessageBox.warning(self, "警告", "请先选择PDF文件")
return
try:
self.progress_bar.setVisible(True)
self.progress_bar.setValue(0)
input_file = self.selected_files[0]
output_dir = os.path.dirname(input_file)
if target_format == "word":
output_file = os.path.splitext(input_file)[0] + ".docx"
PDFConverter.pdf_to_word(input_file, output_file)
elif target_format == "image":
output_dir = os.path.splitext(input_file)[0] + "_images"
os.makedirs(output_dir, exist_ok=True)
PDFConverter.pdf_to_images(input_file, output_dir)
elif target_format == "markdown":
output_file = os.path.splitext(input_file)[0] + ".md"
PDFConverter.pdf_to_markdown(input_file, output_file)
elif target_format == "excel":
output_file = os.path.splitext(input_file)[0] + ".xlsx"
PDFConverter.pdf_to_excel(input_file, output_file)
self.progress_bar.setValue(100)
QMessageBox.information(self, "成功", "转换完成!")
except Exception as e:
QMessageBox.critical(self, "错误", f"转换失败:{str(e)}")
finally:
self.progress_bar.setVisible(False)
def create_merger_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择按钮和文件列表显示
select_files = QPushButton("选择PDF文件")
select_files.clicked.connect(self.select_multiple_pdfs)
self.files_label = QLabel("未选择文件")
# 添加进度条
self.merger_progress = QProgressBar()
self.merger_progress.setVisible(False)
merge_button = QPushButton("合并PDF")
merge_button.clicked.connect(self.merge_pdfs)
layout.addWidget(select_files)
layout.addWidget(self.files_label)
layout.addWidget(self.merger_progress)
layout.addWidget(merge_button)
return widget
def select_multiple_pdfs(self):
files, _ = QFileDialog.getOpenFileNames(
self,
"选择多个PDF文件",
"",
"PDF文件 (*.pdf)"
)
if files:
self.selected_files = files
self.files_label.setText(f"已选择 {len(files)} 个文件")
def merge_pdfs(self):
if len(self.selected_files) < 2:
QMessageBox.warning(self, "警告", "请至少选择两个PDF文件")
return
try:
output_file, _ = QFileDialog.getSaveFileName(
self,
"保存合并后的PDF",
"",
"PDF文件 (*.pdf)"
)
if output_file:
self.merger_progress.setVisible(True)
self.merger_progress.setValue(0)
PDFMerger.merge_pdfs(self.selected_files, output_file)
self.merger_progress.setValue(100)
QMessageBox.information(self, "成功", "PDF合并完成")
except Exception as e:
QMessageBox.critical(self, "错误", f"合并失败:{str(e)}")
finally:
self.merger_progress.setVisible(False)
def create_splitter_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择和拆分按钮
select_file = QPushButton("选择PDF文件")
select_file.clicked.connect(self.select_pdf_for_split)
self.split_file_label = QLabel("未选择文件")
# 添加进度条
self.split_progress = QProgressBar()
self.split_progress.setVisible(False)
split_button = QPushButton("拆分PDF")
split_button.clicked.connect(self.split_pdf)
layout.addWidget(select_file)
layout.addWidget(self.split_file_label)
layout.addWidget(self.split_progress)
layout.addWidget(split_button)
return widget
def select_pdf_for_split(self):
file_name, _ = QFileDialog.getOpenFileName(
self,
"选择要拆分的PDF文件",
"",
"PDF文件 (*.pdf)"
)
if file_name:
self.split_file = file_name
self.split_file_label.setText(f"已选择: {os.path.basename(file_name)}")
def split_pdf(self):
if not hasattr(self, 'split_file'):
QMessageBox.warning(self, "警告", "请先选择PDF文件")
return
try:
output_dir = QFileDialog.getExistingDirectory(
self,
"选择保存目录"
)
if output_dir:
self.split_progress.setVisible(True)
self.split_progress.setValue(0)
PDFSplitter.split_pdf(self.split_file, output_dir)
self.split_progress.setValue(100)
QMessageBox.information(self, "成功", "PDF拆分完成")
except Exception as e:
QMessageBox.critical(self, "错误", f"拆分失败:{str(e)}")
finally:
self.split_progress.setVisible(False)
def create_compressor_tab(self):
widget = QWidget()
layout = QVBoxLayout(widget)
# 添加文件选择和压缩按钮
select_file = QPushButton("选择PDF文件")
select_file.clicked.connect(self.select_pdf_for_compress)
self.compress_file_label = QLabel("未选择文件")
# 添加进度条
self.compress_progress = QProgressBar()
self.compress_progress.setVisible(False)
compress_button = QPushButton("压缩PDF")
compress_button.clicked.connect(self.compress_pdf)
layout.addWidget(select_file)
layout.addWidget(self.compress_file_label)
layout.addWidget(self.compress_progress)
layout.addWidget(compress_button)
return widget
def select_pdf_for_compress(self):
file_name, _ = QFileDialog.getOpenFileName(
self,
"选择要压缩的PDF文件",
"",
"PDF文件 (*.pdf)"
)
if file_name:
self.compress_file = file_name
self.compress_file_label.setText(f"已选择: {os.path.basename(file_name)}")
def compress_pdf(self):
if not hasattr(self, 'compress_file'):
QMessageBox.warning(self, "警告", "请先选择PDF文件")
return
try:
output_file, _ = QFileDialog.getSaveFileName(
self,
"保存压缩后的PDF",
"",
"PDF文件 (*.pdf)"
)
if output_file:
self.compress_progress.setVisible(True)
self.compress_progress.setValue(0)
PDFCompressor.compress_pdf(self.compress_file, output_file)
self.compress_progress.setValue(100)
QMessageBox.information(self, "成功", "PDF压缩完成")
except Exception as e:
QMessageBox.critical(self, "错误", f"压缩失败:{str(e)}")
finally:
self.compress_progress.setVisible(False)
# 删除以下水印相关的方法:
# - create_watermark_tab
# - select_watermark_color
# - select_pdf_for_watermark
# - add_watermark

76
src/ui/styles.py Normal file
View File

@ -0,0 +1,76 @@
MAIN_STYLE = """
QMainWindow {
background-color: #f5f6fa;
}
QPushButton {
background-color: #3498db;
color: white;
border: none;
padding: 12px 24px;
border-radius: 6px;
font-size: 14px;
font-weight: bold;
margin: 8px;
min-width: 200px;
}
QPushButton:hover {
background-color: #2980b9;
transition: background-color 0.3s;
}
QPushButton:pressed {
background-color: #2471a3;
}
QLineEdit {
padding: 10px;
border: 2px solid #bdc3c7;
border-radius: 6px;
background-color: white;
font-size: 14px;
margin: 8px;
}
QLineEdit:focus {
border-color: #3498db;
}
QTabWidget::pane {
border: none;
background-color: white;
border-radius: 8px;
margin-top: -1px;
}
QTabBar::tab {
background-color: #ecf0f1;
color: #34495e;
padding: 12px 24px;
margin-right: 2px;
border-top-left-radius: 6px;
border-top-right-radius: 6px;
font-weight: bold;
}
QTabBar::tab:selected {
background-color: white;
color: #2980b9;
border-bottom: 3px solid #3498db;
}
QTabBar::tab:hover:!selected {
background-color: #bdc3c7;
}
QWidget {
font-family: "Microsoft YaHei", "Segoe UI", sans-serif;
}
QLabel {
color: #2c3e50;
font-size: 14px;
margin: 8px;
}
"""