修改了index.html以更新标题和描述,增强了用户界面,添加了导出为Word文档的功能。更新了README.md以反映新功能,增加了使用方法和功能特点的描述。同时,调整了CSS样式以支持新的导出选项,确保用户体验流畅。修复了LICENSE文件的格式问题。
552 lines
17 KiB
JavaScript
552 lines
17 KiB
JavaScript
// 导入依赖库
|
||
import * as pdfjsLib from './pdf.mjs';
|
||
import JSZip from './jszip.min.js';
|
||
import * as docx from './docx.esm.js';
|
||
|
||
// 设置PDF.js worker路径
|
||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'cssjs/js/pdf.worker.mjs';
|
||
|
||
// 全局变量
|
||
let pdfDocument = null;
|
||
let pdfFile = null;
|
||
let renderedPages = [];
|
||
|
||
// DOM元素
|
||
const uploadArea = document.getElementById('upload-area');
|
||
const pdfFileInput = document.getElementById('pdf-file-input');
|
||
const selectFileBtn = document.getElementById('select-file-btn');
|
||
const loadingContainer = document.getElementById('loading-container');
|
||
const pdfInfoContainer = document.getElementById('pdf-info-container');
|
||
const previewContainer = document.getElementById('preview-container');
|
||
const previewItems = document.getElementById('preview-items');
|
||
const exportBtn = document.getElementById('export-btn');
|
||
const combinePagesSwitch = document.getElementById('combine-pages-switch');
|
||
const highQualitySwitch = document.getElementById('high-quality-switch');
|
||
const preserveLayoutSwitch = document.getElementById('preserve-layout-switch');
|
||
const exportImageRadio = document.getElementById('export-image');
|
||
const exportWordRadio = document.getElementById('export-word');
|
||
const imageOptions = document.getElementById('image-options');
|
||
const wordOptions = document.getElementById('word-options');
|
||
|
||
// PDF信息显示元素
|
||
const pdfNameValue = document.getElementById('pdf-name-value');
|
||
const pdfSizeValue = document.getElementById('pdf-size-value');
|
||
const pdfPagesValue = document.getElementById('pdf-pages-value');
|
||
|
||
// 事件监听器
|
||
document.addEventListener('DOMContentLoaded', function() {
|
||
// 初始化事件监听
|
||
initEventListeners();
|
||
});
|
||
|
||
// 初始化事件监听器
|
||
function initEventListeners() {
|
||
selectFileBtn.addEventListener('click', () => pdfFileInput.click());
|
||
pdfFileInput.addEventListener('change', handleFileSelect);
|
||
exportBtn.addEventListener('click', handleExport);
|
||
|
||
// 导出类型切换
|
||
exportImageRadio.addEventListener('change', updateExportOptions);
|
||
exportWordRadio.addEventListener('change', updateExportOptions);
|
||
|
||
// 拖放功能
|
||
uploadArea.addEventListener('dragover', (e) => {
|
||
e.preventDefault();
|
||
uploadArea.classList.add('dragover');
|
||
});
|
||
|
||
uploadArea.addEventListener('dragleave', () => {
|
||
uploadArea.classList.remove('dragover');
|
||
});
|
||
|
||
uploadArea.addEventListener('drop', (e) => {
|
||
e.preventDefault();
|
||
uploadArea.classList.remove('dragover');
|
||
|
||
if (e.dataTransfer.files.length > 0) {
|
||
const file = e.dataTransfer.files[0];
|
||
if (file.type === 'application/pdf') {
|
||
pdfFileInput.files = e.dataTransfer.files;
|
||
handleFileSelect(e);
|
||
} else {
|
||
showError('请选择PDF文件');
|
||
}
|
||
}
|
||
});
|
||
}
|
||
|
||
// 更新导出选项显示
|
||
function updateExportOptions() {
|
||
if (exportImageRadio.checked) {
|
||
imageOptions.style.display = 'block';
|
||
wordOptions.style.display = 'none';
|
||
} else {
|
||
imageOptions.style.display = 'none';
|
||
wordOptions.style.display = 'block';
|
||
}
|
||
}
|
||
|
||
// 处理导出按钮点击
|
||
function handleExport() {
|
||
if (!pdfDocument || renderedPages.length === 0) {
|
||
showError('没有可导出的内容');
|
||
return;
|
||
}
|
||
|
||
if (exportImageRadio.checked) {
|
||
exportImages();
|
||
} else {
|
||
exportWord();
|
||
}
|
||
}
|
||
|
||
// 处理文件选择
|
||
function handleFileSelect(e) {
|
||
const file = pdfFileInput.files[0];
|
||
|
||
if (!file) return;
|
||
|
||
if (file.type !== 'application/pdf') {
|
||
showError('请选择PDF文件');
|
||
return;
|
||
}
|
||
|
||
if (file.size > 20 * 1024 * 1024) { // 20MB
|
||
showError('文件大小不能超过20MB');
|
||
return;
|
||
}
|
||
|
||
pdfFile = file;
|
||
|
||
// 显示加载状态
|
||
uploadArea.style.display = 'none';
|
||
loadingContainer.style.display = 'block';
|
||
previewContainer.style.display = 'none';
|
||
pdfInfoContainer.style.display = 'none';
|
||
previewItems.innerHTML = '';
|
||
renderedPages = [];
|
||
|
||
// 读取PDF文件
|
||
const reader = new FileReader();
|
||
reader.onload = function(event) {
|
||
const typedArray = new Uint8Array(event.target.result);
|
||
loadPdfFromData(typedArray);
|
||
};
|
||
reader.readAsArrayBuffer(file);
|
||
}
|
||
|
||
// 从ArrayBuffer加载PDF
|
||
function loadPdfFromData(data) {
|
||
pdfjsLib.getDocument({ data }).promise
|
||
.then(pdf => {
|
||
pdfDocument = pdf;
|
||
|
||
// 更新PDF信息
|
||
pdfNameValue.textContent = pdfFile.name;
|
||
pdfSizeValue.textContent = formatFileSize(pdfFile.size);
|
||
pdfPagesValue.textContent = pdf.numPages;
|
||
|
||
// 渲染预览
|
||
renderPdfPreview(pdf);
|
||
})
|
||
.catch(error => {
|
||
console.error('PDF加载错误:', error);
|
||
showError('无法加载PDF文件,请确保文件未损坏');
|
||
resetUI();
|
||
});
|
||
}
|
||
|
||
// 渲染PDF预览
|
||
function renderPdfPreview(pdf) {
|
||
const totalPages = pdf.numPages;
|
||
let renderedCount = 0;
|
||
|
||
// 更新加载文本
|
||
document.getElementById('loading-text').textContent = `正在渲染预览 (0/${totalPages})`;
|
||
|
||
// 为每一页创建预览
|
||
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) {
|
||
pdf.getPage(pageNumber).then(page => {
|
||
const scale = 0.5; // 预览缩放比例
|
||
const viewport = page.getViewport({ scale });
|
||
|
||
// 创建canvas元素
|
||
const canvas = document.createElement('canvas');
|
||
const context = canvas.getContext('2d');
|
||
canvas.width = viewport.width;
|
||
canvas.height = viewport.height;
|
||
|
||
// 渲染PDF页面到canvas
|
||
const renderContext = {
|
||
canvasContext: context,
|
||
viewport: viewport
|
||
};
|
||
|
||
page.render(renderContext).promise.then(() => {
|
||
renderedCount++;
|
||
document.getElementById('loading-text').textContent = `正在渲染预览 (${renderedCount}/${totalPages})`;
|
||
|
||
// 存储渲染的页面
|
||
renderedPages[pageNumber - 1] = {
|
||
pageNumber: pageNumber,
|
||
canvas: canvas,
|
||
width: viewport.width,
|
||
height: viewport.height
|
||
};
|
||
|
||
// 如果所有页面都已渲染,显示预览
|
||
if (renderedCount === totalPages) {
|
||
displayPreviews();
|
||
}
|
||
});
|
||
});
|
||
}
|
||
}
|
||
|
||
// 显示预览
|
||
function displayPreviews() {
|
||
// 按页码排序
|
||
renderedPages.sort((a, b) => a.pageNumber - b.pageNumber);
|
||
|
||
// 清空预览容器
|
||
previewItems.innerHTML = '';
|
||
|
||
// 添加每一页的预览
|
||
renderedPages.forEach(page => {
|
||
const colDiv = document.createElement('div');
|
||
colDiv.className = 'col-md-6 col-lg-4 preview-item';
|
||
|
||
const pageNumberDiv = document.createElement('div');
|
||
pageNumberDiv.className = 'page-number';
|
||
pageNumberDiv.textContent = `第 ${page.pageNumber} 页`;
|
||
|
||
// 克隆canvas以避免原始canvas被修改
|
||
const displayCanvas = document.createElement('canvas');
|
||
displayCanvas.width = page.canvas.width;
|
||
displayCanvas.height = page.canvas.height;
|
||
const displayContext = displayCanvas.getContext('2d');
|
||
displayContext.drawImage(page.canvas, 0, 0);
|
||
|
||
colDiv.appendChild(displayCanvas);
|
||
colDiv.appendChild(pageNumberDiv);
|
||
previewItems.appendChild(colDiv);
|
||
});
|
||
|
||
// 显示预览和控制面板
|
||
loadingContainer.style.display = 'none';
|
||
pdfInfoContainer.style.display = 'block';
|
||
previewContainer.style.display = 'block';
|
||
}
|
||
|
||
// 导出图片
|
||
function exportImages() {
|
||
|
||
const combinePages = combinePagesSwitch.checked;
|
||
const highQuality = highQualitySwitch.checked;
|
||
const scale = highQuality ? 2.0 : 1.0;
|
||
|
||
// 显示加载状态
|
||
loadingContainer.style.display = 'block';
|
||
document.getElementById('loading-text').textContent = '正在准备导出...';
|
||
|
||
if (combinePages) {
|
||
// 合并为单张图片
|
||
exportCombinedImage(scale);
|
||
} else {
|
||
// 导出为多张图片
|
||
exportMultipleImages(scale);
|
||
}
|
||
}
|
||
|
||
// 导出合并的单张图片
|
||
function exportCombinedImage(scale) {
|
||
// 计算合并后的图片尺寸
|
||
let totalHeight = 0;
|
||
let maxWidth = 0;
|
||
|
||
renderedPages.forEach(page => {
|
||
totalHeight += page.height * (scale / 0.5);
|
||
maxWidth = Math.max(maxWidth, page.width * (scale / 0.5));
|
||
});
|
||
|
||
// 创建合并的canvas
|
||
const combinedCanvas = document.createElement('canvas');
|
||
combinedCanvas.width = maxWidth;
|
||
combinedCanvas.height = totalHeight;
|
||
const combinedContext = combinedCanvas.getContext('2d');
|
||
|
||
// 填充白色背景
|
||
combinedContext.fillStyle = '#FFFFFF';
|
||
combinedContext.fillRect(0, 0, combinedCanvas.width, combinedCanvas.height);
|
||
|
||
// 重新渲染每一页到合并的canvas
|
||
let currentY = 0;
|
||
let renderedCount = 0;
|
||
|
||
const renderNextPage = (index) => {
|
||
if (index >= renderedPages.length) {
|
||
// 所有页面都已渲染,导出图片
|
||
combinedCanvas.toBlob(blob => {
|
||
saveAs(blob, `${pdfFile.name.replace('.pdf', '')}_combined.png`);
|
||
loadingContainer.style.display = 'none';
|
||
}, 'image/png');
|
||
return;
|
||
}
|
||
|
||
const page = renderedPages[index];
|
||
|
||
// 更新加载文本
|
||
document.getElementById('loading-text').textContent = `正在合并页面 (${index + 1}/${renderedPages.length})`;
|
||
|
||
// 获取原始页面
|
||
pdfDocument.getPage(page.pageNumber).then(pdfPage => {
|
||
const viewport = pdfPage.getViewport({ scale });
|
||
|
||
// 创建临时canvas
|
||
const tempCanvas = document.createElement('canvas');
|
||
tempCanvas.width = viewport.width;
|
||
tempCanvas.height = viewport.height;
|
||
const tempContext = tempCanvas.getContext('2d');
|
||
|
||
// 渲染到临时canvas
|
||
const renderContext = {
|
||
canvasContext: tempContext,
|
||
viewport: viewport
|
||
};
|
||
|
||
pdfPage.render(renderContext).promise.then(() => {
|
||
// 将临时canvas的内容绘制到合并的canvas
|
||
const x = (maxWidth - viewport.width) / 2; // 居中
|
||
combinedContext.drawImage(tempCanvas, x, currentY);
|
||
|
||
// 更新Y坐标
|
||
currentY += viewport.height;
|
||
|
||
// 渲染下一页
|
||
renderNextPage(index + 1);
|
||
});
|
||
});
|
||
};
|
||
|
||
// 开始渲染第一页
|
||
renderNextPage(0);
|
||
}
|
||
|
||
// 导出多张图片
|
||
function exportMultipleImages(scale) {
|
||
const zip = new JSZip();
|
||
const folder = zip.folder("images");
|
||
let processedCount = 0;
|
||
|
||
// 更新加载文本
|
||
document.getElementById('loading-text').textContent = `正在导出图片 (0/${renderedPages.length})`;
|
||
|
||
// 处理每一页
|
||
renderedPages.forEach((page, index) => {
|
||
// 获取原始页面
|
||
pdfDocument.getPage(page.pageNumber).then(pdfPage => {
|
||
const viewport = pdfPage.getViewport({ scale });
|
||
|
||
// 创建canvas
|
||
const canvas = document.createElement('canvas');
|
||
canvas.width = viewport.width;
|
||
canvas.height = viewport.height;
|
||
const context = canvas.getContext('2d');
|
||
|
||
// 填充白色背景
|
||
context.fillStyle = '#FFFFFF';
|
||
context.fillRect(0, 0, canvas.width, canvas.height);
|
||
|
||
// 渲染PDF页面到canvas
|
||
const renderContext = {
|
||
canvasContext: context,
|
||
viewport: viewport
|
||
};
|
||
|
||
pdfPage.render(renderContext).promise.then(() => {
|
||
// 将canvas转换为blob
|
||
canvas.toBlob(blob => {
|
||
// 添加到zip
|
||
folder.file(`page_${page.pageNumber}.png`, blob);
|
||
|
||
processedCount++;
|
||
document.getElementById('loading-text').textContent = `正在导出图片 (${processedCount}/${renderedPages.length})`;
|
||
|
||
// 如果所有页面都已处理,生成并下载zip
|
||
if (processedCount === renderedPages.length) {
|
||
// 生成并下载zip文件
|
||
zip.generateAsync({ type: 'blob' }).then(content => {
|
||
saveAs(content, `${pdfFile.name.replace('.pdf', '')}_images.zip`);
|
||
loadingContainer.style.display = 'none';
|
||
});
|
||
}
|
||
}, 'image/png');
|
||
});
|
||
});
|
||
});
|
||
}
|
||
|
||
// 辅助函数
|
||
function formatFileSize(bytes) {
|
||
if (bytes < 1024) return bytes + ' B';
|
||
else if (bytes < 1048576) return (bytes / 1024).toFixed(2) + ' KB';
|
||
else return (bytes / 1048576).toFixed(2) + ' MB';
|
||
}
|
||
|
||
function showError(message) {
|
||
alert(message);
|
||
}
|
||
|
||
function resetUI() {
|
||
uploadArea.style.display = 'block';
|
||
loadingContainer.style.display = 'none';
|
||
pdfInfoContainer.style.display = 'none';
|
||
previewContainer.style.display = 'none';
|
||
pdfFileInput.value = '';
|
||
}
|
||
|
||
// 导出为Word文档
|
||
async function exportWord() {
|
||
// 显示加载状态
|
||
loadingContainer.style.display = 'block';
|
||
document.getElementById('loading-text').textContent = '正在准备导出Word文档...';
|
||
|
||
try {
|
||
// 等待 docx 组件加载完成
|
||
const Document = await docx.Document;
|
||
const Paragraph = await docx.Paragraph;
|
||
const ImageRun = await docx.ImageRun;
|
||
const HeadingLevel = await docx.HeadingLevel;
|
||
const AlignmentType = await docx.AlignmentType;
|
||
const Packer = await docx.Packer;
|
||
|
||
// 创建一个新的Word文档
|
||
const doc = new Document({
|
||
sections: [{
|
||
properties: {},
|
||
children: []
|
||
}]
|
||
});
|
||
|
||
const preserveLayout = preserveLayoutSwitch.checked;
|
||
let processedCount = 0;
|
||
|
||
// 处理每一页
|
||
for (let i = 0; i < renderedPages.length; i++) {
|
||
const page = renderedPages[i];
|
||
document.getElementById('loading-text').textContent = `正在处理第 ${i + 1}/${renderedPages.length} 页`;
|
||
|
||
// 获取页面文本内容
|
||
const textContent = await pdfDocument.getPage(page.pageNumber).then(pdfPage => {
|
||
return pdfPage.getTextContent();
|
||
});
|
||
|
||
// 如果保留布局,添加页面图像
|
||
if (preserveLayout) {
|
||
// 获取页面图像
|
||
const pdfPage = await pdfDocument.getPage(page.pageNumber);
|
||
const viewport = pdfPage.getViewport({ scale: 1.5 });
|
||
|
||
// 创建canvas
|
||
const canvas = document.createElement('canvas');
|
||
canvas.width = viewport.width;
|
||
canvas.height = viewport.height;
|
||
const context = canvas.getContext('2d');
|
||
|
||
// 填充白色背景
|
||
context.fillStyle = '#FFFFFF';
|
||
context.fillRect(0, 0, canvas.width, canvas.height);
|
||
|
||
// 渲染PDF页面到canvas
|
||
const renderContext = {
|
||
canvasContext: context,
|
||
viewport: viewport
|
||
};
|
||
|
||
await pdfPage.render(renderContext).promise;
|
||
|
||
// 将canvas转换为图像数据
|
||
const imageData = canvas.toDataURL('image/png');
|
||
const imageBase64 = imageData.split(',')[1];
|
||
|
||
// 将base64转换为Uint8Array
|
||
const binaryString = atob(imageBase64);
|
||
const bytes = new Uint8Array(binaryString.length);
|
||
for (let j = 0; j < binaryString.length; j++) {
|
||
bytes[j] = binaryString.charCodeAt(j);
|
||
}
|
||
|
||
// 确保数据是Buffer类型
|
||
const imageBuffer = Buffer.from(bytes);
|
||
|
||
// 添加图像到Word文档
|
||
doc.addSection({
|
||
properties: {},
|
||
children: [
|
||
new Paragraph({
|
||
children: [
|
||
new ImageRun({
|
||
data: imageBuffer,
|
||
transformation: {
|
||
width: 600,
|
||
height: 600 * (viewport.height / viewport.width)
|
||
}
|
||
})
|
||
]
|
||
}),
|
||
new Paragraph({
|
||
text: `第 ${page.pageNumber} 页`,
|
||
alignment: AlignmentType.CENTER
|
||
})
|
||
]
|
||
});
|
||
} else {
|
||
// 仅提取文本内容
|
||
let pageText = '';
|
||
let lastY = -1;
|
||
|
||
// 处理文本项
|
||
textContent.items.forEach(item => {
|
||
// 如果Y坐标变化,添加换行
|
||
if (lastY !== -1 && lastY !== item.transform[5]) {
|
||
pageText += '\n';
|
||
}
|
||
|
||
pageText += item.str;
|
||
lastY = item.transform[5];
|
||
});
|
||
|
||
// 添加文本到Word文档
|
||
doc.addSection({
|
||
properties: {},
|
||
children: [
|
||
new Paragraph({
|
||
text: `第 ${page.pageNumber} 页`,
|
||
heading: HeadingLevel.HEADING_1,
|
||
alignment: AlignmentType.CENTER
|
||
}),
|
||
new Paragraph({
|
||
text: pageText
|
||
})
|
||
]
|
||
});
|
||
}
|
||
|
||
processedCount++;
|
||
}
|
||
|
||
// 生成Word文档
|
||
document.getElementById('loading-text').textContent = '正在生成Word文档...';
|
||
const buffer = await Packer.toBlob(doc);
|
||
|
||
// 保存文件
|
||
saveAs(buffer, `${pdfFile.name.replace('.pdf', '')}.docx`);
|
||
|
||
// 隐藏加载状态
|
||
loadingContainer.style.display = 'none';
|
||
} catch (error) {
|
||
console.error('Word导出错误:', error);
|
||
showError('导出Word文档时出错');
|
||
loadingContainer.style.display = 'none';
|
||
}
|
||
} |