feat: 初始化PDF工具箱项目，支持PDF转换、合并、拆分和压缩功能

此次提交初始化了PDF工具箱项目，包括以下核心功能： 1. PDF转Word、图片、Markdown和Excel 2. PDF文件合并 3. PDF文件拆分 4. PDF文件压缩 5. 基于PyQt6的图形用户界面 6. 项目结构搭建和依赖管理
2025-04-26 13:11:26 +08:00
commit 6f665d35b9
13 changed files with 647 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,45 @@
+# PDF工具箱
+
+一个简单易用的PDF处理工具，支持多种格式转换和PDF文件处理功能。
+
+## 功能特性
+
+- PDF转Word：将PDF文件转换为Word文档
+- PDF转图片：将PDF文件转换为图片文件
+- PDF转Markdown：提取PDF文本内容并转换为Markdown格式
+- PDF转Excel：将PDF中的表格数据提取为Excel文件
+- PDF合并：将多个PDF文件合并为一个文件
+- PDF拆分：将PDF文件拆分为多个单页文件
+- PDF压缩：压缩PDF文件大小
+
+## 环境要求
+
+- Python 3.11 或更高版本
+- 所需Python包已在 requirements.txt 中列出
+- 对于PDF转Excel功能，需要安装Java环境：
+  - 安装Java JDK（推荐Java 8或更高版本）
+  - 配置JAVA_HOME环境变量
+  - 将Java添加到系统PATH中
+
+### Java环境配置步骤（仅PDF转Excel功能需要）
+
+1. 下载并安装Java JDK
+   - 访问 [Oracle Java下载页面](https://www.oracle.com/java/technologies/downloads/) 或使用OpenJDK
+   - 选择适合您系统的Java版本进行下载和安装
+
+2. 配置环境变量
+   - 右键"此电脑" -> "属性" -> "高级系统设置" -> "环境变量"
+   - 在系统变量中新建 JAVA_HOME，值为Java安装目录（如：C:\Program Files\Java\jdk-x.x.x）
+   - 在系统变量Path中添加 %JAVA_HOME%\bin
+
+3. 验证安装
+   - 打开命令提示符
+   - 输入 `java -version`
+   - 如果显示版本信息，说明配置成功
+
+## 安装步骤
+
+1. 克隆或下载本项目
+2. 安装依赖包：
+```bash
+pip install -r requirements.txt
--- a/project_structure.txt
+++ b/project_structure.txt
@@ -0,0 +1,21 @@
+pdf_toolbox/
+├── src/
+│   ├── __init__.py
+│   ├── main.py              # 主程序入口
+│   ├── ui/
+│   │   ├── __init__.py
+│   │   ├── main_window.py   # 主窗口UI
+│   │   └── styles.py        # UI样式
+│   ├── core/
+│   │   ├── __init__.py
+│   │   ├── converter.py     # 格式转换
+│   │   ├── merger.py        # PDF合并
+│   │   ├── splitter.py      # PDF拆分
+│   │   ├── compressor.py    # PDF压缩
+│   │   ├── watermark.py     # 水印处理
+│   │   └── utils.py         # 工具函数
+│   └── resources/
+│       ├── icons/           # 图标资源
+│       └── styles/          # 样式资源
+├── requirements.txt         # 依赖包
+└── README.md               # 项目说明
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+PyQt6>=6.6.1
+pdf2docx==0.5.6
+PyMuPDF>=1.23.8
+tabula-py>=2.9.0
+pandas>=2.2.0
+Markdown>=3.5.2
+pyinstaller>=6.4.0
+Pillow>=10.2.0
--- a/src/core/compressor.py
+++ b/src/core/compressor.py
@@ -0,0 +1,15 @@
+import fitz
+
+class PDFCompressor:
+    @staticmethod
+    def compress_pdf(input_path, output_path):
+        """
+        压缩PDF文件
+        :param input_path: 输入文件路径
+        :param output_path: 输出文件路径
+        """
+        doc = fitz.open(input_path)
+        try:
+            doc.save(output_path, deflate=True, garbage=4)
+        finally:
+            doc.close()
--- a/src/core/converter.py
+++ b/src/core/converter.py
@@ -0,0 +1,48 @@
+import pdf2docx
+import fitz
+import markdown
+import pandas as pd
+import os
+
+class PDFConverter:
+    @staticmethod
+    def pdf_to_word(pdf_path, output_path):
+        converter = pdf2docx.Converter(pdf_path)
+        converter.convert(output_path)
+        converter.close()
+
+    @staticmethod
+    def pdf_to_images(pdf_path, output_dir):
+        doc = fitz.open(pdf_path)
+        for page_num in range(len(doc)):
+            page = doc.load_page(page_num)
+            pix = page.get_pixmap()
+            output_path = os.path.join(output_dir, f'page_{page_num + 1}.png')
+            pix.save(output_path)
+
+    @staticmethod
+    def pdf_to_markdown(pdf_path, output_path):
+        # 使用PyMuPDF提取文本并转换为Markdown
+        doc = fitz.open(pdf_path)
+        markdown_text = ""
+        for page in doc:
+            markdown_text += page.get_text()
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write(markdown_text)
+
+    @staticmethod
+    def pdf_to_excel(pdf_path, output_path):
+        """
+        将PDF转换为Excel
+        :param pdf_path: PDF文件路径
+        :param output_path: 输出文件路径
+        """
+        # 使用tabula-py提取表格数据
+        import tabula
+        df = tabula.read_pdf(pdf_path, pages='all')
+        if df:
+            df[0].to_excel(output_path, index=False)
+        else:
+            # 如果没有找到表格，创建一个空的Excel文件
+            pd.DataFrame().to_excel(output_path, index=False)
--- a/src/core/merger.py
+++ b/src/core/merger.py
@@ -0,0 +1,20 @@
+import fitz
+
+class PDFMerger:
+    @staticmethod
+    def merge_pdfs(pdf_paths, output_path):
+        """
+        合并多个PDF文件
+        :param pdf_paths: PDF文件路径列表
+        :param output_path: 输出文件路径
+        """
+        merged_pdf = fitz.open()
+        
+        try:
+            for pdf_path in pdf_paths:
+                with fitz.open(pdf_path) as pdf_doc:
+                    merged_pdf.insert_pdf(pdf_doc)
+            
+            merged_pdf.save(output_path)
+        finally:
+            merged_pdf.close()
--- a/src/core/splitter.py
+++ b/src/core/splitter.py
@@ -0,0 +1,22 @@
+import fitz
+import os
+
+class PDFSplitter:
+    @staticmethod
+    def split_pdf(pdf_path, output_dir):
+        """
+        拆分PDF文件
+        :param pdf_path: PDF文件路径
+        :param output_dir: 输出目录
+        """
+        doc = fitz.open(pdf_path)
+        
+        try:
+            for page_num in range(len(doc)):
+                output_pdf = fitz.open()
+                output_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)
+                output_path = os.path.join(output_dir, f'page_{page_num + 1}.pdf')
+                output_pdf.save(output_path)
+                output_pdf.close()
+        finally:
+            doc.close()
--- a/src/core/utils.py
+++ b/src/core/utils.py
@@ -0,0 +1,11 @@
+import os
+
+def ensure_dir(directory):
+    """确保目录存在，如果不存在则创建"""
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+def get_output_path(input_path, new_ext):
+    """根据输入文件路径生成输出文件路径"""
+    base = os.path.splitext(input_path)[0]
+    return f"{base}.{new_ext}"
--- a/src/main.py
+++ b/src/main.py
@@ -0,0 +1,14 @@
+import sys
+from PyQt6.QtWidgets import QApplication
+from ui.main_window import MainWindow
+from ui.styles import MAIN_STYLE
+
+def main():
+    app = QApplication(sys.argv)
+    app.setStyleSheet(MAIN_STYLE)
+    window = MainWindow()
+    window.show()
+    sys.exit(app.exec())
+
+if __name__ == "__main__":
+    main()
--- a/src/resources/create_icon.py
+++ b/src/resources/create_icon.py
@@ -0,0 +1,58 @@
+from PIL import Image, ImageDraw, ImageFont
+import os
+
+def create_app_icon():
+    # 创建一个512x512的图像（ICO推荐尺寸）
+    size = 512
+    image = Image.new('RGBA', (size, size), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(image)
+    
+    # 绘制圆形背景
+    background_color = (52, 152, 219)  # 使用漂亮的蓝色
+    draw.ellipse([10, 10, size-10, size-10], fill=background_color)
+    
+    # 绘制PDF文字
+    text_color = (255, 255, 255)
+    font_size = size // 4
+    try:
+        # 尝试使用Arial字体，如果系统没有则使用默认字体
+        font = ImageFont.truetype("arial.ttf", font_size)
+    except:
+        font = ImageFont.load_default()
+    
+    text = "PDF"
+    # 获取文字大小以居中显示
+    text_bbox = draw.textbbox((0, 0), text, font=font)
+    text_width = text_bbox[2] - text_bbox[0]
+    text_height = text_bbox[3] - text_bbox[1]
+    
+    x = (size - text_width) // 2
+    y = (size - text_height) // 2
+    draw.text((x, y), text, font=font, fill=text_color)
+    
+    # 保存不同尺寸的图标
+    icon_sizes = [(16, 16), (32, 32), (48, 48), (64, 64), (128, 128), (256, 256)]
+    icon_images = []
+    
+    for icon_size in icon_sizes:
+        resized_image = image.resize(icon_size, Image.Resampling.LANCZOS)
+        icon_images.append(resized_image)
+    
+    # 确保resources/icons目录存在
+    icons_dir = os.path.dirname(os.path.abspath(__file__))
+    icons_path = os.path.join(icons_dir, 'icons')
+    os.makedirs(icons_path, exist_ok=True)
+    
+    # 保存ICO文件
+    icon_path = os.path.join(icons_path, 'app.ico')
+    icon_images[0].save(
+        icon_path,
+        format='ICO',
+        sizes=icon_sizes,
+        append_images=icon_images[1:]
+    )
+    
+    print(f"图标已创建: {icon_path}")
+
+if __name__ == "__main__":
+    create_app_icon()
--- a/src/resources/icons/app.ico
+++ b/src/resources/icons/app.ico
--- a/src/ui/main_window.py
+++ b/src/ui/main_window.py
@@ -0,0 +1,309 @@
+from PyQt6.QtWidgets import (QMainWindow, QWidget, QVBoxLayout, 
+                           QPushButton, QLabel, QFileDialog, QTabWidget,
+                           QLineEdit, QMessageBox, QProgressBar)
+from PyQt6.QtCore import Qt
+from PyQt6.QtGui import QIcon
+from core.converter import PDFConverter
+from core.merger import PDFMerger
+from core.splitter import PDFSplitter
+from core.compressor import PDFCompressor
+import os
+
+class MainWindow(QMainWindow):
+    def __init__(self):
+        super().__init__()
+        self.setWindowTitle("PDF工具箱")
+        self.setMinimumSize(800, 600)
+        self.selected_files = []
+        self.setup_ui()
+
+    def setup_ui(self):
+        # 创建中心部件
+        central_widget = QWidget()
+        self.setCentralWidget(central_widget)
+        layout = QVBoxLayout(central_widget)
+
+        # 创建选项卡
+        tab_widget = QTabWidget()
+        
+        # 添加各功能选项卡
+        tab_widget.addTab(self.create_converter_tab(), "格式转换")
+        tab_widget.addTab(self.create_merger_tab(), "PDF合并")
+        tab_widget.addTab(self.create_splitter_tab(), "PDF拆分")
+        tab_widget.addTab(self.create_compressor_tab(), "PDF压缩")
+        # 删除水印选项卡
+        
+        layout.addWidget(tab_widget)
+
+    def create_converter_tab(self):
+        widget = QWidget()
+        layout = QVBoxLayout(widget)
+        
+        # 添加文件选择
+        select_file_btn = QPushButton("选择PDF文件")
+        select_file_btn.clicked.connect(self.select_pdf_file)
+        
+        # 添加文件路径显示
+        self.file_label = QLabel("未选择文件")
+        
+        # 添加进度条
+        self.progress_bar = QProgressBar()
+        self.progress_bar.setVisible(False)
+        
+        # 添加转换按钮
+        convert_to_word = QPushButton("转换为Word")
+        convert_to_word.clicked.connect(lambda: self.convert_pdf("word"))
+        
+        convert_to_image = QPushButton("转换为图片")
+        convert_to_image.clicked.connect(lambda: self.convert_pdf("image"))
+        
+        convert_to_markdown = QPushButton("转换为Markdown")
+        convert_to_markdown.clicked.connect(lambda: self.convert_pdf("markdown"))
+        
+        convert_to_excel = QPushButton("转换为Excel")
+        convert_to_excel.clicked.connect(lambda: self.convert_pdf("excel"))
+        
+        layout.addWidget(select_file_btn)
+        layout.addWidget(self.file_label)
+        layout.addWidget(self.progress_bar)
+        layout.addWidget(convert_to_word)
+        layout.addWidget(convert_to_image)
+        layout.addWidget(convert_to_markdown)
+        layout.addWidget(convert_to_excel)
+        
+        return widget
+
+    def select_pdf_file(self):
+        file_name, _ = QFileDialog.getOpenFileName(
+            self,
+            "选择PDF文件",
+            "",
+            "PDF文件 (*.pdf)"
+        )
+        if file_name:
+            self.selected_files = [file_name]
+            self.file_label.setText(f"已选择: {os.path.basename(file_name)}")
+
+    def convert_pdf(self, target_format):
+        if not self.selected_files:
+            QMessageBox.warning(self, "警告", "请先选择PDF文件！")
+            return
+            
+        try:
+            self.progress_bar.setVisible(True)
+            self.progress_bar.setValue(0)
+            
+            input_file = self.selected_files[0]
+            output_dir = os.path.dirname(input_file)
+            
+            if target_format == "word":
+                output_file = os.path.splitext(input_file)[0] + ".docx"
+                PDFConverter.pdf_to_word(input_file, output_file)
+            elif target_format == "image":
+                output_dir = os.path.splitext(input_file)[0] + "_images"
+                os.makedirs(output_dir, exist_ok=True)
+                PDFConverter.pdf_to_images(input_file, output_dir)
+            elif target_format == "markdown":
+                output_file = os.path.splitext(input_file)[0] + ".md"
+                PDFConverter.pdf_to_markdown(input_file, output_file)
+            elif target_format == "excel":
+                output_file = os.path.splitext(input_file)[0] + ".xlsx"
+                PDFConverter.pdf_to_excel(input_file, output_file)
+                
+            self.progress_bar.setValue(100)
+            QMessageBox.information(self, "成功", "转换完成！")
+            
+        except Exception as e:
+            QMessageBox.critical(self, "错误", f"转换失败：{str(e)}")
+        finally:
+            self.progress_bar.setVisible(False)
+
+    def create_merger_tab(self):
+        widget = QWidget()
+        layout = QVBoxLayout(widget)
+        
+        # 添加文件选择按钮和文件列表显示
+        select_files = QPushButton("选择PDF文件")
+        select_files.clicked.connect(self.select_multiple_pdfs)
+        
+        self.files_label = QLabel("未选择文件")
+        
+        # 添加进度条
+        self.merger_progress = QProgressBar()
+        self.merger_progress.setVisible(False)
+        
+        merge_button = QPushButton("合并PDF")
+        merge_button.clicked.connect(self.merge_pdfs)
+        
+        layout.addWidget(select_files)
+        layout.addWidget(self.files_label)
+        layout.addWidget(self.merger_progress)
+        layout.addWidget(merge_button)
+        
+        return widget
+    
+    def select_multiple_pdfs(self):
+        files, _ = QFileDialog.getOpenFileNames(
+            self,
+            "选择多个PDF文件",
+            "",
+            "PDF文件 (*.pdf)"
+        )
+        if files:
+            self.selected_files = files
+            self.files_label.setText(f"已选择 {len(files)} 个文件")
+    
+    def merge_pdfs(self):
+        if len(self.selected_files) < 2:
+            QMessageBox.warning(self, "警告", "请至少选择两个PDF文件！")
+            return
+            
+        try:
+            output_file, _ = QFileDialog.getSaveFileName(
+                self,
+                "保存合并后的PDF",
+                "",
+                "PDF文件 (*.pdf)"
+            )
+            
+            if output_file:
+                self.merger_progress.setVisible(True)
+                self.merger_progress.setValue(0)
+                
+                PDFMerger.merge_pdfs(self.selected_files, output_file)
+                
+                self.merger_progress.setValue(100)
+                QMessageBox.information(self, "成功", "PDF合并完成！")
+                
+        except Exception as e:
+            QMessageBox.critical(self, "错误", f"合并失败：{str(e)}")
+        finally:
+            self.merger_progress.setVisible(False)
+
+    def create_splitter_tab(self):
+        widget = QWidget()
+        layout = QVBoxLayout(widget)
+        
+        # 添加文件选择和拆分按钮
+        select_file = QPushButton("选择PDF文件")
+        select_file.clicked.connect(self.select_pdf_for_split)
+        
+        self.split_file_label = QLabel("未选择文件")
+        
+        # 添加进度条
+        self.split_progress = QProgressBar()
+        self.split_progress.setVisible(False)
+        
+        split_button = QPushButton("拆分PDF")
+        split_button.clicked.connect(self.split_pdf)
+        
+        layout.addWidget(select_file)
+        layout.addWidget(self.split_file_label)
+        layout.addWidget(self.split_progress)
+        layout.addWidget(split_button)
+        
+        return widget
+
+    def select_pdf_for_split(self):
+        file_name, _ = QFileDialog.getOpenFileName(
+            self,
+            "选择要拆分的PDF文件",
+            "",
+            "PDF文件 (*.pdf)"
+        )
+        if file_name:
+            self.split_file = file_name
+            self.split_file_label.setText(f"已选择: {os.path.basename(file_name)}")
+
+    def split_pdf(self):
+        if not hasattr(self, 'split_file'):
+            QMessageBox.warning(self, "警告", "请先选择PDF文件！")
+            return
+            
+        try:
+            output_dir = QFileDialog.getExistingDirectory(
+                self,
+                "选择保存目录"
+            )
+            
+            if output_dir:
+                self.split_progress.setVisible(True)
+                self.split_progress.setValue(0)
+                
+                PDFSplitter.split_pdf(self.split_file, output_dir)
+                
+                self.split_progress.setValue(100)
+                QMessageBox.information(self, "成功", "PDF拆分完成！")
+                
+        except Exception as e:
+            QMessageBox.critical(self, "错误", f"拆分失败：{str(e)}")
+        finally:
+            self.split_progress.setVisible(False)
+
+    def create_compressor_tab(self):
+        widget = QWidget()
+        layout = QVBoxLayout(widget)
+        
+        # 添加文件选择和压缩按钮
+        select_file = QPushButton("选择PDF文件")
+        select_file.clicked.connect(self.select_pdf_for_compress)
+        
+        self.compress_file_label = QLabel("未选择文件")
+        
+        # 添加进度条
+        self.compress_progress = QProgressBar()
+        self.compress_progress.setVisible(False)
+        
+        compress_button = QPushButton("压缩PDF")
+        compress_button.clicked.connect(self.compress_pdf)
+        
+        layout.addWidget(select_file)
+        layout.addWidget(self.compress_file_label)
+        layout.addWidget(self.compress_progress)
+        layout.addWidget(compress_button)
+        
+        return widget
+    
+    def select_pdf_for_compress(self):
+        file_name, _ = QFileDialog.getOpenFileName(
+            self,
+            "选择要压缩的PDF文件",
+            "",
+            "PDF文件 (*.pdf)"
+        )
+        if file_name:
+            self.compress_file = file_name
+            self.compress_file_label.setText(f"已选择: {os.path.basename(file_name)}")
+    
+    def compress_pdf(self):
+        if not hasattr(self, 'compress_file'):
+            QMessageBox.warning(self, "警告", "请先选择PDF文件！")
+            return
+            
+        try:
+            output_file, _ = QFileDialog.getSaveFileName(
+                self,
+                "保存压缩后的PDF",
+                "",
+                "PDF文件 (*.pdf)"
+            )
+            
+            if output_file:
+                self.compress_progress.setVisible(True)
+                self.compress_progress.setValue(0)
+                
+                PDFCompressor.compress_pdf(self.compress_file, output_file)
+                
+                self.compress_progress.setValue(100)
+                QMessageBox.information(self, "成功", "PDF压缩完成！")
+                
+        except Exception as e:
+            QMessageBox.critical(self, "错误", f"压缩失败：{str(e)}")
+        finally:
+            self.compress_progress.setVisible(False)
+
+    # 删除以下水印相关的方法:
+    # - create_watermark_tab
+    # - select_watermark_color
+    # - select_pdf_for_watermark
+    # - add_watermark
--- a/src/ui/styles.py
+++ b/src/ui/styles.py
@@ -0,0 +1,76 @@
+MAIN_STYLE = """
+QMainWindow {
+    background-color: #f5f6fa;
+}
+
+QPushButton {
+    background-color: #3498db;
+    color: white;
+    border: none;
+    padding: 12px 24px;
+    border-radius: 6px;
+    font-size: 14px;
+    font-weight: bold;
+    margin: 8px;
+    min-width: 200px;
+}
+
+QPushButton:hover {
+    background-color: #2980b9;
+    transition: background-color 0.3s;
+}
+
+QPushButton:pressed {
+    background-color: #2471a3;
+}
+
+QLineEdit {
+    padding: 10px;
+    border: 2px solid #bdc3c7;
+    border-radius: 6px;
+    background-color: white;
+    font-size: 14px;
+    margin: 8px;
+}
+
+QLineEdit:focus {
+    border-color: #3498db;
+}
+
+QTabWidget::pane {
+    border: none;
+    background-color: white;
+    border-radius: 8px;
+    margin-top: -1px;
+}
+
+QTabBar::tab {
+    background-color: #ecf0f1;
+    color: #34495e;
+    padding: 12px 24px;
+    margin-right: 2px;
+    border-top-left-radius: 6px;
+    border-top-right-radius: 6px;
+    font-weight: bold;
+}
+
+QTabBar::tab:selected {
+    background-color: white;
+    color: #2980b9;
+    border-bottom: 3px solid #3498db;
+}
+
+QTabBar::tab:hover:!selected {
+    background-color: #bdc3c7;
+}
+
+QWidget {
+    font-family: "Microsoft YaHei", "Segoe UI", sans-serif;
+}
+
+QLabel {
+    color: #2c3e50;
+    font-size: 14px;
+    margin: 8px;
+}
+"""