From 0a92efb96a19434218009f35cae79cc8fba53872 Mon Sep 17 00:00:00 2001 From: Snowz <372492339@qq.com> Date: Sat, 26 Apr 2025 14:32:43 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=89=B9=E9=87=8F=E8=BD=AC?= =?UTF-8?q?=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 74 ++++++++ project_structure.txt | 21 +++ requirements.txt | 8 + src/core/compressor.py | 15 ++ src/core/converter.py | 81 +++++++++ src/core/merger.py | 20 +++ src/core/splitter.py | 22 +++ src/core/utils.py | 11 ++ src/main.py | 14 ++ src/resources/create_icon.py | 58 +++++++ src/resources/icons/app.ico | Bin 0 -> 741 bytes src/ui/main_window.py | 322 +++++++++++++++++++++++++++++++++++ src/ui/styles.py | 76 +++++++++ 13 files changed, 722 insertions(+) create mode 100644 README.md create mode 100644 project_structure.txt create mode 100644 requirements.txt create mode 100644 src/core/compressor.py create mode 100644 src/core/converter.py create mode 100644 src/core/merger.py create mode 100644 src/core/splitter.py create mode 100644 src/core/utils.py create mode 100644 src/main.py create mode 100644 src/resources/create_icon.py create mode 100644 src/resources/icons/app.ico create mode 100644 src/ui/main_window.py create mode 100644 src/ui/styles.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..25b9ea1 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# PDF工具箱 + +一个简单易用的PDF处理工具,支持多种格式转换和PDF文件处理功能。 + +## 功能特性 + +- PDF转Word:将PDF文件转换为Word文档 +- PDF转图片:将PDF文件转换为图片文件 +- PDF转Markdown:提取PDF文本内容并转换为Markdown格式 +- PDF转Excel:将PDF中的表格数据提取为Excel文件 +- PDF合并:将多个PDF文件合并为一个文件 +- PDF拆分:将PDF文件拆分为多个单页文件 +- PDF压缩:压缩PDF文件大小 + +## 环境要求 + +- Python 3.11 或更高版本 +- 所需Python包已在 requirements.txt 中列出 +- 对于PDF转Excel功能,需要安装Java环境: + - 安装Java JDK(推荐Java 8或更高版本) + - 配置JAVA_HOME环境变量 + - 将Java添加到系统PATH中 + +### Java环境配置步骤(仅PDF转Excel功能需要) + +1. 下载并安装Java JDK + - 访问 [Oracle Java下载页面](https://www.oracle.com/java/technologies/downloads/) 或使用OpenJDK + - 选择适合您系统的Java版本进行下载和安装 + +2. 配置环境变量 + - 右键"此电脑" -> "属性" -> "高级系统设置" -> "环境变量" + - 在系统变量中新建 JAVA_HOME,值为Java安装目录(如:C:\Program Files\Java\jdk-x.x.x) + - 在系统变量Path中添加 %JAVA_HOME%\bin + +3. 验证安装 + - 打开命令提示符 + - 输入 `java -version` + - 如果显示版本信息,说明配置成功 + +## 安装步骤 + +1. 克隆或下载本项目 +2. 安装依赖包: +```bash +pip install -r requirements.txt +``` + +## 打包说明 +如果您想将程序打包成可执行文件,可以使用PyInstaller进行打包: + +1. 首先安装PyInstaller: +```bash +pip install pyinstaller +``` + +2. 进入项目目录,运行以下命令进行打包: +```bash +pyinstaller --onefile --windowed --add-data "src;src" --add-data "config.json;." --add-data "requirements.txt;." main.py +``` + +3. 打包完成后,在dist目录下会生成可执行文件。 +打包完成后,可执行文件将在 dist/PDF工具箱 目录中生成。 +注意事项: +- 打包前请确保已安装所有依赖包 +- 如果使用PDF转Excel功能,打包后的程序仍然需要Java环境 +- 首次运行打包后的程序可能需要稍等片刻 + +## 使用说明 + +打包命令的参数说明: +- `--name "PDF工具箱"`: 设置生成的程序名称 +- `--windowed`: 不显示控制台窗口 +- `--icon=assets/icon.ico`: 设置程序图标(如果有的话) +- `--add-data "assets;assets"`: 包含资源文件(如果有的话) \ No newline at end of file diff --git a/project_structure.txt b/project_structure.txt new file mode 100644 index 0000000..c51412d --- /dev/null +++ b/project_structure.txt @@ -0,0 +1,21 @@ +pdf_toolbox/ +├── src/ +│ ├── __init__.py +│ ├── main.py # 主程序入口 +│ ├── ui/ +│ │ ├── __init__.py +│ │ ├── main_window.py # 主窗口UI +│ │ └── styles.py # UI样式 +│ ├── core/ +│ │ ├── __init__.py +│ │ ├── converter.py # 格式转换 +│ │ ├── merger.py # PDF合并 +│ │ ├── splitter.py # PDF拆分 +│ │ ├── compressor.py # PDF压缩 +│ │ ├── watermark.py # 水印处理 +│ │ └── utils.py # 工具函数 +│ └── resources/ +│ ├── icons/ # 图标资源 +│ └── styles/ # 样式资源 +├── requirements.txt # 依赖包 +└── README.md # 项目说明 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ab2e63f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +PyQt6>=6.6.1 +pdf2docx==0.5.6 +PyMuPDF>=1.23.8 +tabula-py>=2.9.0 +pandas>=2.2.0 +Markdown>=3.5.2 +pyinstaller>=6.4.0 +Pillow>=10.2.0 \ No newline at end of file diff --git a/src/core/compressor.py b/src/core/compressor.py new file mode 100644 index 0000000..a7f695f --- /dev/null +++ b/src/core/compressor.py @@ -0,0 +1,15 @@ +import fitz + +class PDFCompressor: + @staticmethod + def compress_pdf(input_path, output_path): + """ + 压缩PDF文件 + :param input_path: 输入文件路径 + :param output_path: 输出文件路径 + """ + doc = fitz.open(input_path) + try: + doc.save(output_path, deflate=True, garbage=4) + finally: + doc.close() \ No newline at end of file diff --git a/src/core/converter.py b/src/core/converter.py new file mode 100644 index 0000000..8cac042 --- /dev/null +++ b/src/core/converter.py @@ -0,0 +1,81 @@ +import pdf2docx +import fitz +import markdown +import pandas as pd +import os + +class PDFConverter: + @staticmethod + def batch_convert(pdf_paths, output_dir, convert_type): + """ + 批量转换PDF文件 + :param pdf_paths: PDF文件路径列表 + :param output_dir: 输出目录 + :param convert_type: 转换类型 ('word', 'images', 'markdown', 'excel') + """ + os.makedirs(output_dir, exist_ok=True) + + for pdf_path in pdf_paths: + filename = os.path.basename(pdf_path) + name_without_ext = os.path.splitext(filename)[0] + + try: + if convert_type == 'word': + output_path = os.path.join(output_dir, f"{name_without_ext}.docx") + PDFConverter.pdf_to_word(pdf_path, output_path) + elif convert_type == 'images': + # 为每个PDF创建单独的图片目录 + pdf_images_dir = os.path.join(output_dir, name_without_ext) + os.makedirs(pdf_images_dir, exist_ok=True) + PDFConverter.pdf_to_images(pdf_path, pdf_images_dir) + elif convert_type == 'markdown': + output_path = os.path.join(output_dir, f"{name_without_ext}.md") + PDFConverter.pdf_to_markdown(pdf_path, output_path) + elif convert_type == 'excel': + output_path = os.path.join(output_dir, f"{name_without_ext}.xlsx") + PDFConverter.pdf_to_excel(pdf_path, output_path) + except Exception as e: + print(f"转换文件 {filename} 时出错: {str(e)}") + continue + + @staticmethod + def pdf_to_word(pdf_path, output_path): + converter = pdf2docx.Converter(pdf_path) + converter.convert(output_path) + converter.close() + + @staticmethod + def pdf_to_images(pdf_path, output_dir): + doc = fitz.open(pdf_path) + for page_num in range(len(doc)): + page = doc.load_page(page_num) + pix = page.get_pixmap() + output_path = os.path.join(output_dir, f'page_{page_num + 1}.png') + pix.save(output_path) + + @staticmethod + def pdf_to_markdown(pdf_path, output_path): + # 使用PyMuPDF提取文本并转换为Markdown + doc = fitz.open(pdf_path) + markdown_text = "" + for page in doc: + markdown_text += page.get_text() + + with open(output_path, 'w', encoding='utf-8') as f: + f.write(markdown_text) + + @staticmethod + def pdf_to_excel(pdf_path, output_path): + """ + 将PDF转换为Excel + :param pdf_path: PDF文件路径 + :param output_path: 输出文件路径 + """ + # 使用tabula-py提取表格数据 + import tabula + df = tabula.read_pdf(pdf_path, pages='all') + if df: + df[0].to_excel(output_path, index=False) + else: + # 如果没有找到表格,创建一个空的Excel文件 + pd.DataFrame().to_excel(output_path, index=False) \ No newline at end of file diff --git a/src/core/merger.py b/src/core/merger.py new file mode 100644 index 0000000..2185d9d --- /dev/null +++ b/src/core/merger.py @@ -0,0 +1,20 @@ +import fitz + +class PDFMerger: + @staticmethod + def merge_pdfs(pdf_paths, output_path): + """ + 合并多个PDF文件 + :param pdf_paths: PDF文件路径列表 + :param output_path: 输出文件路径 + """ + merged_pdf = fitz.open() + + try: + for pdf_path in pdf_paths: + with fitz.open(pdf_path) as pdf_doc: + merged_pdf.insert_pdf(pdf_doc) + + merged_pdf.save(output_path) + finally: + merged_pdf.close() \ No newline at end of file diff --git a/src/core/splitter.py b/src/core/splitter.py new file mode 100644 index 0000000..b0b22dc --- /dev/null +++ b/src/core/splitter.py @@ -0,0 +1,22 @@ +import fitz +import os + +class PDFSplitter: + @staticmethod + def split_pdf(pdf_path, output_dir): + """ + 拆分PDF文件 + :param pdf_path: PDF文件路径 + :param output_dir: 输出目录 + """ + doc = fitz.open(pdf_path) + + try: + for page_num in range(len(doc)): + output_pdf = fitz.open() + output_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num) + output_path = os.path.join(output_dir, f'page_{page_num + 1}.pdf') + output_pdf.save(output_path) + output_pdf.close() + finally: + doc.close() \ No newline at end of file diff --git a/src/core/utils.py b/src/core/utils.py new file mode 100644 index 0000000..43285c3 --- /dev/null +++ b/src/core/utils.py @@ -0,0 +1,11 @@ +import os + +def ensure_dir(directory): + """确保目录存在,如果不存在则创建""" + if not os.path.exists(directory): + os.makedirs(directory) + +def get_output_path(input_path, new_ext): + """根据输入文件路径生成输出文件路径""" + base = os.path.splitext(input_path)[0] + return f"{base}.{new_ext}" \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..386ea7f --- /dev/null +++ b/src/main.py @@ -0,0 +1,14 @@ +import sys +from PyQt6.QtWidgets import QApplication +from ui.main_window import MainWindow +from ui.styles import MAIN_STYLE + +def main(): + app = QApplication(sys.argv) + app.setStyleSheet(MAIN_STYLE) + window = MainWindow() + window.show() + sys.exit(app.exec()) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/resources/create_icon.py b/src/resources/create_icon.py new file mode 100644 index 0000000..a9a513e --- /dev/null +++ b/src/resources/create_icon.py @@ -0,0 +1,58 @@ +from PIL import Image, ImageDraw, ImageFont +import os + +def create_app_icon(): + # 创建一个512x512的图像(ICO推荐尺寸) + size = 512 + image = Image.new('RGBA', (size, size), (0, 0, 0, 0)) + draw = ImageDraw.Draw(image) + + # 绘制圆形背景 + background_color = (52, 152, 219) # 使用漂亮的蓝色 + draw.ellipse([10, 10, size-10, size-10], fill=background_color) + + # 绘制PDF文字 + text_color = (255, 255, 255) + font_size = size // 4 + try: + # 尝试使用Arial字体,如果系统没有则使用默认字体 + font = ImageFont.truetype("arial.ttf", font_size) + except: + font = ImageFont.load_default() + + text = "PDF" + # 获取文字大小以居中显示 + text_bbox = draw.textbbox((0, 0), text, font=font) + text_width = text_bbox[2] - text_bbox[0] + text_height = text_bbox[3] - text_bbox[1] + + x = (size - text_width) // 2 + y = (size - text_height) // 2 + draw.text((x, y), text, font=font, fill=text_color) + + # 保存不同尺寸的图标 + icon_sizes = [(16, 16), (32, 32), (48, 48), (64, 64), (128, 128), (256, 256)] + icon_images = [] + + for icon_size in icon_sizes: + resized_image = image.resize(icon_size, Image.Resampling.LANCZOS) + icon_images.append(resized_image) + + # 确保resources/icons目录存在 + icons_dir = os.path.dirname(os.path.abspath(__file__)) + icons_path = os.path.join(icons_dir, 'icons') + os.makedirs(icons_path, exist_ok=True) + + # 保存ICO文件 + icon_path = os.path.join(icons_path, 'app.ico') + icon_images[0].save( + icon_path, + format='ICO', + sizes=icon_sizes, + append_images=icon_images[1:] + ) + + print(f"图标已创建: {icon_path}") + +if __name__ == "__main__": + create_app_icon() \ No newline at end of file diff --git a/src/resources/icons/app.ico b/src/resources/icons/app.ico new file mode 100644 index 0000000000000000000000000000000000000000..3435ab693d3855f80e95d84f74fec5ff8823591d GIT binary patch literal 741 zcmVJd3oZ(OPzb_NFj^!;8X2_9 zb*+XZY}2Ndw5xS3vPJE4)kYx+anVAYL5$EQEfD(?g-qu;@6DTgPu%w$k%;c*9qzl| zIp6utcL@9E<||PX!(;$Z-;4EY)|f%DrdPN4lOnEV&20<0}_Ak$z{xK*SnLL<;{nB4S7>Qpq&e1MhOF&^|-`W1_uNpXOA$ zIbW~t8gdl=b6gUY$|@6hAASeuW$$EpfRpLG@hjGWBT4`wuKOa=!Loro@CJ zj(}@%YGntTo?ktmg(