commit 0a92efb96a19434218009f35cae79cc8fba53872 Author: Snowz <372492339@qq.com> Date: Sat Apr 26 14:32:43 2025 +0800 支持批量转换 diff --git a/README.md b/README.md new file mode 100644 index 0000000..25b9ea1 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# PDF工具箱 + +一个简单易用的PDF处理工具,支持多种格式转换和PDF文件处理功能。 + +## 功能特性 + +- PDF转Word:将PDF文件转换为Word文档 +- PDF转图片:将PDF文件转换为图片文件 +- PDF转Markdown:提取PDF文本内容并转换为Markdown格式 +- PDF转Excel:将PDF中的表格数据提取为Excel文件 +- PDF合并:将多个PDF文件合并为一个文件 +- PDF拆分:将PDF文件拆分为多个单页文件 +- PDF压缩:压缩PDF文件大小 + +## 环境要求 + +- Python 3.11 或更高版本 +- 所需Python包已在 requirements.txt 中列出 +- 对于PDF转Excel功能,需要安装Java环境: + - 安装Java JDK(推荐Java 8或更高版本) + - 配置JAVA_HOME环境变量 + - 将Java添加到系统PATH中 + +### Java环境配置步骤(仅PDF转Excel功能需要) + +1. 下载并安装Java JDK + - 访问 [Oracle Java下载页面](https://www.oracle.com/java/technologies/downloads/) 或使用OpenJDK + - 选择适合您系统的Java版本进行下载和安装 + +2. 配置环境变量 + - 右键"此电脑" -> "属性" -> "高级系统设置" -> "环境变量" + - 在系统变量中新建 JAVA_HOME,值为Java安装目录(如:C:\Program Files\Java\jdk-x.x.x) + - 在系统变量Path中添加 %JAVA_HOME%\bin + +3. 验证安装 + - 打开命令提示符 + - 输入 `java -version` + - 如果显示版本信息,说明配置成功 + +## 安装步骤 + +1. 克隆或下载本项目 +2. 安装依赖包: +```bash +pip install -r requirements.txt +``` + +## 打包说明 +如果您想将程序打包成可执行文件,可以使用PyInstaller进行打包: + +1. 首先安装PyInstaller: +```bash +pip install pyinstaller +``` + +2. 进入项目目录,运行以下命令进行打包: +```bash +pyinstaller --onefile --windowed --add-data "src;src" --add-data "config.json;." --add-data "requirements.txt;." main.py +``` + +3. 打包完成后,在dist目录下会生成可执行文件。 +打包完成后,可执行文件将在 dist/PDF工具箱 目录中生成。 +注意事项: +- 打包前请确保已安装所有依赖包 +- 如果使用PDF转Excel功能,打包后的程序仍然需要Java环境 +- 首次运行打包后的程序可能需要稍等片刻 + +## 使用说明 + +打包命令的参数说明: +- `--name "PDF工具箱"`: 设置生成的程序名称 +- `--windowed`: 不显示控制台窗口 +- `--icon=assets/icon.ico`: 设置程序图标(如果有的话) +- `--add-data "assets;assets"`: 包含资源文件(如果有的话) \ No newline at end of file diff --git a/project_structure.txt b/project_structure.txt new file mode 100644 index 0000000..c51412d --- /dev/null +++ b/project_structure.txt @@ -0,0 +1,21 @@ +pdf_toolbox/ +├── src/ +│ ├── __init__.py +│ ├── main.py # 主程序入口 +│ ├── ui/ +│ │ ├── __init__.py +│ │ ├── main_window.py # 主窗口UI +│ │ └── styles.py # UI样式 +│ ├── core/ +│ │ ├── __init__.py +│ │ ├── converter.py # 格式转换 +│ │ ├── merger.py # PDF合并 +│ │ ├── splitter.py # PDF拆分 +│ │ ├── compressor.py # PDF压缩 +│ │ ├── watermark.py # 水印处理 +│ │ └── utils.py # 工具函数 +│ └── resources/ +│ ├── icons/ # 图标资源 +│ └── styles/ # 样式资源 +├── requirements.txt # 依赖包 +└── README.md # 项目说明 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ab2e63f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +PyQt6>=6.6.1 +pdf2docx==0.5.6 +PyMuPDF>=1.23.8 +tabula-py>=2.9.0 +pandas>=2.2.0 +Markdown>=3.5.2 +pyinstaller>=6.4.0 +Pillow>=10.2.0 \ No newline at end of file diff --git a/src/core/compressor.py b/src/core/compressor.py new file mode 100644 index 0000000..a7f695f --- /dev/null +++ b/src/core/compressor.py @@ -0,0 +1,15 @@ +import fitz + +class PDFCompressor: + @staticmethod + def compress_pdf(input_path, output_path): + """ + 压缩PDF文件 + :param input_path: 输入文件路径 + :param output_path: 输出文件路径 + """ + doc = fitz.open(input_path) + try: + doc.save(output_path, deflate=True, garbage=4) + finally: + doc.close() \ No newline at end of file diff --git a/src/core/converter.py b/src/core/converter.py new file mode 100644 index 0000000..8cac042 --- /dev/null +++ b/src/core/converter.py @@ -0,0 +1,81 @@ +import pdf2docx +import fitz +import markdown +import pandas as pd +import os + +class PDFConverter: + @staticmethod + def batch_convert(pdf_paths, output_dir, convert_type): + """ + 批量转换PDF文件 + :param pdf_paths: PDF文件路径列表 + :param output_dir: 输出目录 + :param convert_type: 转换类型 ('word', 'images', 'markdown', 'excel') + """ + os.makedirs(output_dir, exist_ok=True) + + for pdf_path in pdf_paths: + filename = os.path.basename(pdf_path) + name_without_ext = os.path.splitext(filename)[0] + + try: + if convert_type == 'word': + output_path = os.path.join(output_dir, f"{name_without_ext}.docx") + PDFConverter.pdf_to_word(pdf_path, output_path) + elif convert_type == 'images': + # 为每个PDF创建单独的图片目录 + pdf_images_dir = os.path.join(output_dir, name_without_ext) + os.makedirs(pdf_images_dir, exist_ok=True) + PDFConverter.pdf_to_images(pdf_path, pdf_images_dir) + elif convert_type == 'markdown': + output_path = os.path.join(output_dir, f"{name_without_ext}.md") + PDFConverter.pdf_to_markdown(pdf_path, output_path) + elif convert_type == 'excel': + output_path = os.path.join(output_dir, f"{name_without_ext}.xlsx") + PDFConverter.pdf_to_excel(pdf_path, output_path) + except Exception as e: + print(f"转换文件 {filename} 时出错: {str(e)}") + continue + + @staticmethod + def pdf_to_word(pdf_path, output_path): + converter = pdf2docx.Converter(pdf_path) + converter.convert(output_path) + converter.close() + + @staticmethod + def pdf_to_images(pdf_path, output_dir): + doc = fitz.open(pdf_path) + for page_num in range(len(doc)): + page = doc.load_page(page_num) + pix = page.get_pixmap() + output_path = os.path.join(output_dir, f'page_{page_num + 1}.png') + pix.save(output_path) + + @staticmethod + def pdf_to_markdown(pdf_path, output_path): + # 使用PyMuPDF提取文本并转换为Markdown + doc = fitz.open(pdf_path) + markdown_text = "" + for page in doc: + markdown_text += page.get_text() + + with open(output_path, 'w', encoding='utf-8') as f: + f.write(markdown_text) + + @staticmethod + def pdf_to_excel(pdf_path, output_path): + """ + 将PDF转换为Excel + :param pdf_path: PDF文件路径 + :param output_path: 输出文件路径 + """ + # 使用tabula-py提取表格数据 + import tabula + df = tabula.read_pdf(pdf_path, pages='all') + if df: + df[0].to_excel(output_path, index=False) + else: + # 如果没有找到表格,创建一个空的Excel文件 + pd.DataFrame().to_excel(output_path, index=False) \ No newline at end of file diff --git a/src/core/merger.py b/src/core/merger.py new file mode 100644 index 0000000..2185d9d --- /dev/null +++ b/src/core/merger.py @@ -0,0 +1,20 @@ +import fitz + +class PDFMerger: + @staticmethod + def merge_pdfs(pdf_paths, output_path): + """ + 合并多个PDF文件 + :param pdf_paths: PDF文件路径列表 + :param output_path: 输出文件路径 + """ + merged_pdf = fitz.open() + + try: + for pdf_path in pdf_paths: + with fitz.open(pdf_path) as pdf_doc: + merged_pdf.insert_pdf(pdf_doc) + + merged_pdf.save(output_path) + finally: + merged_pdf.close() \ No newline at end of file diff --git a/src/core/splitter.py b/src/core/splitter.py new file mode 100644 index 0000000..b0b22dc --- /dev/null +++ b/src/core/splitter.py @@ -0,0 +1,22 @@ +import fitz +import os + +class PDFSplitter: + @staticmethod + def split_pdf(pdf_path, output_dir): + """ + 拆分PDF文件 + :param pdf_path: PDF文件路径 + :param output_dir: 输出目录 + """ + doc = fitz.open(pdf_path) + + try: + for page_num in range(len(doc)): + output_pdf = fitz.open() + output_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num) + output_path = os.path.join(output_dir, f'page_{page_num + 1}.pdf') + output_pdf.save(output_path) + output_pdf.close() + finally: + doc.close() \ No newline at end of file diff --git a/src/core/utils.py b/src/core/utils.py new file mode 100644 index 0000000..43285c3 --- /dev/null +++ b/src/core/utils.py @@ -0,0 +1,11 @@ +import os + +def ensure_dir(directory): + """确保目录存在,如果不存在则创建""" + if not os.path.exists(directory): + os.makedirs(directory) + +def get_output_path(input_path, new_ext): + """根据输入文件路径生成输出文件路径""" + base = os.path.splitext(input_path)[0] + return f"{base}.{new_ext}" \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..386ea7f --- /dev/null +++ b/src/main.py @@ -0,0 +1,14 @@ +import sys +from PyQt6.QtWidgets import QApplication +from ui.main_window import MainWindow +from ui.styles import MAIN_STYLE + +def main(): + app = QApplication(sys.argv) + app.setStyleSheet(MAIN_STYLE) + window = MainWindow() + window.show() + sys.exit(app.exec()) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/resources/create_icon.py b/src/resources/create_icon.py new file mode 100644 index 0000000..a9a513e --- /dev/null +++ b/src/resources/create_icon.py @@ -0,0 +1,58 @@ +from PIL import Image, ImageDraw, ImageFont +import os + +def create_app_icon(): + # 创建一个512x512的图像(ICO推荐尺寸) + size = 512 + image = Image.new('RGBA', (size, size), (0, 0, 0, 0)) + draw = ImageDraw.Draw(image) + + # 绘制圆形背景 + background_color = (52, 152, 219) # 使用漂亮的蓝色 + draw.ellipse([10, 10, size-10, size-10], fill=background_color) + + # 绘制PDF文字 + text_color = (255, 255, 255) + font_size = size // 4 + try: + # 尝试使用Arial字体,如果系统没有则使用默认字体 + font = ImageFont.truetype("arial.ttf", font_size) + except: + font = ImageFont.load_default() + + text = "PDF" + # 获取文字大小以居中显示 + text_bbox = draw.textbbox((0, 0), text, font=font) + text_width = text_bbox[2] - text_bbox[0] + text_height = text_bbox[3] - text_bbox[1] + + x = (size - text_width) // 2 + y = (size - text_height) // 2 + draw.text((x, y), text, font=font, fill=text_color) + + # 保存不同尺寸的图标 + icon_sizes = [(16, 16), (32, 32), (48, 48), (64, 64), (128, 128), (256, 256)] + icon_images = [] + + for icon_size in icon_sizes: + resized_image = image.resize(icon_size, Image.Resampling.LANCZOS) + icon_images.append(resized_image) + + # 确保resources/icons目录存在 + icons_dir = os.path.dirname(os.path.abspath(__file__)) + icons_path = os.path.join(icons_dir, 'icons') + os.makedirs(icons_path, exist_ok=True) + + # 保存ICO文件 + icon_path = os.path.join(icons_path, 'app.ico') + icon_images[0].save( + icon_path, + format='ICO', + sizes=icon_sizes, + append_images=icon_images[1:] + ) + + print(f"图标已创建: {icon_path}") + +if __name__ == "__main__": + create_app_icon() \ No newline at end of file diff --git a/src/resources/icons/app.ico b/src/resources/icons/app.ico new file mode 100644 index 0000000..3435ab6 Binary files /dev/null and b/src/resources/icons/app.ico differ diff --git a/src/ui/main_window.py b/src/ui/main_window.py new file mode 100644 index 0000000..1a32f69 --- /dev/null +++ b/src/ui/main_window.py @@ -0,0 +1,322 @@ +from PyQt6.QtWidgets import (QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, + QPushButton, QLabel, QFileDialog, QTabWidget, + QLineEdit, QMessageBox, QProgressBar, QListWidget) +from PyQt6.QtCore import Qt +from PyQt6.QtGui import QIcon +from core.converter import PDFConverter +from core.merger import PDFMerger +from core.splitter import PDFSplitter +from core.compressor import PDFCompressor +import os + +class MainWindow(QMainWindow): + def __init__(self): + super().__init__() + self.setWindowTitle("PDF工具箱") + self.setMinimumSize(800, 600) + self.selected_files = [] + self.setup_ui() + + def setup_ui(self): + # 创建中心部件 + central_widget = QWidget() + self.setCentralWidget(central_widget) + layout = QVBoxLayout(central_widget) + + # 创建选项卡 + tab_widget = QTabWidget() + + # 添加各功能选项卡 + tab_widget.addTab(self.create_converter_tab(), "格式转换") + tab_widget.addTab(self.create_merger_tab(), "PDF合并") + tab_widget.addTab(self.create_splitter_tab(), "PDF拆分") + tab_widget.addTab(self.create_compressor_tab(), "PDF压缩") + # 删除水印选项卡 + + layout.addWidget(tab_widget) + + def create_converter_tab(self): + widget = QWidget() + layout = QVBoxLayout(widget) + + # 添加文件选择 + select_file_btn = QPushButton("选择PDF文件(可多选)") + select_file_btn.clicked.connect(self.select_pdf_files) + + # 添加文件列表显示 + self.files_list_widget = QListWidget() + + # 添加输出目录选择 + output_dir_layout = QHBoxLayout() + self.output_dir_label = QLabel("输出目录:未选择") + select_output_dir_btn = QPushButton("选择输出目录") + select_output_dir_btn.clicked.connect(self.select_output_directory) + output_dir_layout.addWidget(self.output_dir_label) + output_dir_layout.addWidget(select_output_dir_btn) + + # 添加进度条 + self.progress_bar = QProgressBar() + self.progress_bar.setVisible(False) + + # 添加转换按钮 + convert_to_word = QPushButton("批量转换为Word") + convert_to_word.clicked.connect(lambda: self.batch_convert_files("word")) + + convert_to_image = QPushButton("批量转换为图片") + convert_to_image.clicked.connect(lambda: self.batch_convert_files("images")) + + convert_to_markdown = QPushButton("批量转换为Markdown") + convert_to_markdown.clicked.connect(lambda: self.batch_convert_files("markdown")) + + convert_to_excel = QPushButton("批量转换为Excel") + convert_to_excel.clicked.connect(lambda: self.batch_convert_files("excel")) + + layout.addWidget(select_file_btn) + layout.addWidget(self.files_list_widget) + layout.addLayout(output_dir_layout) + layout.addWidget(self.progress_bar) + layout.addWidget(convert_to_word) + layout.addWidget(convert_to_image) + layout.addWidget(convert_to_markdown) + layout.addWidget(convert_to_excel) + + return widget + + def select_pdf_files(self): + files, _ = QFileDialog.getOpenFileNames( + self, + "选择PDF文件", + "", + "PDF文件 (*.pdf)" + ) + if files: + self.selected_files = files + self.files_list_widget.clear() + for file in files: + self.files_list_widget.addItem(os.path.basename(file)) + + def select_output_directory(self): + output_dir = QFileDialog.getExistingDirectory( + self, + "选择输出目录" + ) + if output_dir: + self.output_directory = output_dir + self.output_dir_label.setText(f"输出目录:{output_dir}") + + def batch_convert_files(self, convert_type): + if not hasattr(self, 'selected_files') or not self.selected_files: + QMessageBox.warning(self, "警告", "请先选择PDF文件!") + return + + if not hasattr(self, 'output_directory'): + QMessageBox.warning(self, "警告", "请选择输出目录!") + return + + try: + self.progress_bar.setVisible(True) + self.progress_bar.setValue(0) + + # 计算总文件数用于进度显示 + total_files = len(self.selected_files) + + # 批量转换文件 + PDFConverter.batch_convert(self.selected_files, self.output_directory, convert_type) + + self.progress_bar.setValue(100) + QMessageBox.information(self, "成功", "批量转换完成!") + + except Exception as e: + QMessageBox.critical(self, "错误", f"转换失败:{str(e)}") + finally: + self.progress_bar.setVisible(False) + + def create_merger_tab(self): + widget = QWidget() + layout = QVBoxLayout(widget) + + # 添加文件选择按钮和文件列表显示 + select_files = QPushButton("选择PDF文件") + select_files.clicked.connect(self.select_multiple_pdfs) + + self.files_label = QLabel("未选择文件") + + # 添加进度条 + self.merger_progress = QProgressBar() + self.merger_progress.setVisible(False) + + merge_button = QPushButton("合并PDF") + merge_button.clicked.connect(self.merge_pdfs) + + layout.addWidget(select_files) + layout.addWidget(self.files_label) + layout.addWidget(self.merger_progress) + layout.addWidget(merge_button) + + return widget + + def select_multiple_pdfs(self): + files, _ = QFileDialog.getOpenFileNames( + self, + "选择多个PDF文件", + "", + "PDF文件 (*.pdf)" + ) + if files: + self.selected_files = files + self.files_label.setText(f"已选择 {len(files)} 个文件") + + def merge_pdfs(self): + if len(self.selected_files) < 2: + QMessageBox.warning(self, "警告", "请至少选择两个PDF文件!") + return + + try: + output_file, _ = QFileDialog.getSaveFileName( + self, + "保存合并后的PDF", + "", + "PDF文件 (*.pdf)" + ) + + if output_file: + self.merger_progress.setVisible(True) + self.merger_progress.setValue(0) + + PDFMerger.merge_pdfs(self.selected_files, output_file) + + self.merger_progress.setValue(100) + QMessageBox.information(self, "成功", "PDF合并完成!") + + except Exception as e: + QMessageBox.critical(self, "错误", f"合并失败:{str(e)}") + finally: + self.merger_progress.setVisible(False) + + def create_splitter_tab(self): + widget = QWidget() + layout = QVBoxLayout(widget) + + # 添加文件选择和拆分按钮 + select_file = QPushButton("选择PDF文件") + select_file.clicked.connect(self.select_pdf_for_split) + + self.split_file_label = QLabel("未选择文件") + + # 添加进度条 + self.split_progress = QProgressBar() + self.split_progress.setVisible(False) + + split_button = QPushButton("拆分PDF") + split_button.clicked.connect(self.split_pdf) + + layout.addWidget(select_file) + layout.addWidget(self.split_file_label) + layout.addWidget(self.split_progress) + layout.addWidget(split_button) + + return widget + + def select_pdf_for_split(self): + file_name, _ = QFileDialog.getOpenFileName( + self, + "选择要拆分的PDF文件", + "", + "PDF文件 (*.pdf)" + ) + if file_name: + self.split_file = file_name + self.split_file_label.setText(f"已选择: {os.path.basename(file_name)}") + + def split_pdf(self): + if not hasattr(self, 'split_file'): + QMessageBox.warning(self, "警告", "请先选择PDF文件!") + return + + try: + output_dir = QFileDialog.getExistingDirectory( + self, + "选择保存目录" + ) + + if output_dir: + self.split_progress.setVisible(True) + self.split_progress.setValue(0) + + PDFSplitter.split_pdf(self.split_file, output_dir) + + self.split_progress.setValue(100) + QMessageBox.information(self, "成功", "PDF拆分完成!") + + except Exception as e: + QMessageBox.critical(self, "错误", f"拆分失败:{str(e)}") + finally: + self.split_progress.setVisible(False) + + def create_compressor_tab(self): + widget = QWidget() + layout = QVBoxLayout(widget) + + # 添加文件选择和压缩按钮 + select_file = QPushButton("选择PDF文件") + select_file.clicked.connect(self.select_pdf_for_compress) + + self.compress_file_label = QLabel("未选择文件") + + # 添加进度条 + self.compress_progress = QProgressBar() + self.compress_progress.setVisible(False) + + compress_button = QPushButton("压缩PDF") + compress_button.clicked.connect(self.compress_pdf) + + layout.addWidget(select_file) + layout.addWidget(self.compress_file_label) + layout.addWidget(self.compress_progress) + layout.addWidget(compress_button) + + return widget + + def select_pdf_for_compress(self): + file_name, _ = QFileDialog.getOpenFileName( + self, + "选择要压缩的PDF文件", + "", + "PDF文件 (*.pdf)" + ) + if file_name: + self.compress_file = file_name + self.compress_file_label.setText(f"已选择: {os.path.basename(file_name)}") + + def compress_pdf(self): + if not hasattr(self, 'compress_file'): + QMessageBox.warning(self, "警告", "请先选择PDF文件!") + return + + try: + output_file, _ = QFileDialog.getSaveFileName( + self, + "保存压缩后的PDF", + "", + "PDF文件 (*.pdf)" + ) + + if output_file: + self.compress_progress.setVisible(True) + self.compress_progress.setValue(0) + + PDFCompressor.compress_pdf(self.compress_file, output_file) + + self.compress_progress.setValue(100) + QMessageBox.information(self, "成功", "PDF压缩完成!") + + except Exception as e: + QMessageBox.critical(self, "错误", f"压缩失败:{str(e)}") + finally: + self.compress_progress.setVisible(False) + + # 删除以下水印相关的方法: + # - create_watermark_tab + # - select_watermark_color + # - select_pdf_for_watermark + # - add_watermark \ No newline at end of file diff --git a/src/ui/styles.py b/src/ui/styles.py new file mode 100644 index 0000000..640e30e --- /dev/null +++ b/src/ui/styles.py @@ -0,0 +1,76 @@ +MAIN_STYLE = """ +QMainWindow { + background-color: #f5f6fa; +} + +QPushButton { + background-color: #3498db; + color: white; + border: none; + padding: 12px 24px; + border-radius: 6px; + font-size: 14px; + font-weight: bold; + margin: 8px; + min-width: 200px; +} + +QPushButton:hover { + background-color: #2980b9; + transition: background-color 0.3s; +} + +QPushButton:pressed { + background-color: #2471a3; +} + +QLineEdit { + padding: 10px; + border: 2px solid #bdc3c7; + border-radius: 6px; + background-color: white; + font-size: 14px; + margin: 8px; +} + +QLineEdit:focus { + border-color: #3498db; +} + +QTabWidget::pane { + border: none; + background-color: white; + border-radius: 8px; + margin-top: -1px; +} + +QTabBar::tab { + background-color: #ecf0f1; + color: #34495e; + padding: 12px 24px; + margin-right: 2px; + border-top-left-radius: 6px; + border-top-right-radius: 6px; + font-weight: bold; +} + +QTabBar::tab:selected { + background-color: white; + color: #2980b9; + border-bottom: 3px solid #3498db; +} + +QTabBar::tab:hover:!selected { + background-color: #bdc3c7; +} + +QWidget { + font-family: "Microsoft YaHei", "Segoe UI", sans-serif; +} + +QLabel { + color: #2c3e50; + font-size: 14px; + margin: 8px; +} +""" \ No newline at end of file