1
0
Files
WordPress-mShots-Proxy/server.js
Snowz 27b801ea3d fix: 修复未处理的Promise拒绝和文件流错误导致的进程崩溃
- 添加全局异常捕获(uncaughtException, unhandledRejection)防止进程意外退出
- 修复请求合并逻辑中Promise链处理不当导致的Unhandled Promise Rejection
- 为文件读取流添加错误监听,防止文件系统异常导致进程崩溃
- 更新README.md文档以反映稳定性修复
- 将*.log添加到.gitignore忽略日志文件
2026-01-25 03:09:00 +08:00

603 lines
19 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const express = require('express');
const axios = require('axios');
const fs = require('fs');
const fsPromises = require('fs').promises;
const path = require('path');
const crypto = require('crypto');
const net = require('net');
// 全局错误捕获,防止进程退出
process.on('uncaughtException', (err) => {
console.error('[FATAL] Uncaught Exception:', err);
});
process.on('unhandledRejection', (reason, promise) => {
console.error('[FATAL] Unhandled Rejection:', reason);
});
require('dotenv').config();
const app = express();
const PORT = Number(process.env.PORT) || 11489;
const UPSTREAM_HOST = 'https://s0.wp.com';
const FALLBACK_HOST_BASE = 'https://image.thum.io/get/width/1024/crop/768/noanimate';
const CACHE_DIR = path.join(process.cwd(), 'cache');
fs.mkdirSync(CACHE_DIR, { recursive: true });
// 请求合并 Map (Deduplication)
const pendingRequests = new Map();
/**
* 计算 SHA1 哈希
* @param {string} input
* @returns {string}
*/
function sha1(input) {
return crypto.createHash('sha1').update(input).digest('hex');
}
/**
* 获取缓存文件路径
* @param {string} key
* @returns {{data: string, meta: string}}
*/
function getCachePaths(key) {
return {
data: path.join(CACHE_DIR, `${key}.data`),
meta: path.join(CACHE_DIR, `${key}.json`),
};
}
/**
* 验证响应是否为有效的图片
* @param {number} status
* @param {object} headers
* @param {any} data
* @returns {boolean}
*/
function isValidImageResponse(status, headers, data) {
const ct = (headers['content-type'] || '').toLowerCase();
const lenHeader = headers['content-length'];
const len = Array.isArray(data) ? data.length : (data?.byteLength || 0);
const hasPositiveLength = (lenHeader ? parseInt(lenHeader, 10) > 0 : len > 0);
// 过滤掉 GIF 图片 (通常是 mShots 的 "Generating" 占位图,约 9KB)
// 我们不缓存这些图片,以便下次请求时能再次尝试获取真实截图
if (ct.includes('image/gif') && len < 15000) {
return false;
}
return status === 200 && ct.startsWith('image/') && hasPositiveLength;
}
/**
* 检查主机是否开放 443 端口 (简单的 SSL 判断)
* @param {string} host
* @returns {Promise<boolean>}
*/
function checkPort443(host) {
return new Promise(resolve => {
// 默认超时 1.5 秒,避免阻塞太久
const socket = net.connect(443, host);
socket.setTimeout(1500);
socket.on('connect', () => {
socket.end();
resolve(true);
});
socket.on('error', () => {
resolve(false);
});
socket.on('timeout', () => {
socket.destroy();
resolve(false);
});
});
}
/**
* 解析目标 URL支持自动补全协议
* @param {string} rawPath
* @returns {Promise<string>}
*/
async function resolveTargetUrl(rawPath) {
// 去除开头的 /
let target = rawPath.startsWith('/') ? rawPath.slice(1) : rawPath;
// 如果已经包含协议,直接返回
if (target.startsWith('http://') || target.startsWith('https://')) {
return target;
}
// 提取主机名
let host = target.split('/')[0].split('?')[0];
// 去除端口号(如果存在)
const colonIndex = host.indexOf(':');
if (colonIndex !== -1) {
host = host.substring(0, colonIndex);
}
// 尝试检测 SSL
const isHttps = await checkPort443(host);
if (!isHttps) {
console.log(`[protocol-detect] ${host} : 443 port closed or timeout, falling back to HTTP.`);
}
return isHttps ? `https://${target}` : `http://${target}`;
}
/**
* 请求上游并处理重试
* @param {string} upstreamUrl
* @param {number} tries
* @returns {Promise<any>}
*/
async function fetchUpstreamWithRetry(upstreamUrl, tries = 2) {
let lastErr = null;
for (let i = 0; i < tries; i++) {
try {
return await axios.get(upstreamUrl, {
responseType: 'arraybuffer',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36',
'Accept': 'image/avif,image/webp,image/apng,image/*;q=0.8,*/*;q=0.5',
'Host': 's0.wp.com',
},
maxRedirects: 5,
timeout: 20000,
// 禁用代理环境变量的干扰
proxy: false,
// 明确允许非 2xx 也返回给上层判断
validateStatus: () => true,
});
} catch (err) {
lastErr = err;
console.error(`[upstream-error] try=${i + 1} url=${upstreamUrl} msg=${err.message}`);
// 简单退避
await new Promise(r => setTimeout(r, 300));
}
}
if (lastErr) throw lastErr;
}
/**
* 请求备用接口 (thum.io)
* @param {string} targetUrl
* @returns {Promise<any>}
*/
async function fetchFallbackWithRetry(targetUrl) {
// thum.io 格式: https://image.thum.io/get/<options>/<url>
const fallbackUrl = `${FALLBACK_HOST_BASE}/${targetUrl}`;
console.log(`[fallback-request] trying fallback: ${fallbackUrl}`);
try {
return await axios.get(fallbackUrl, {
responseType: 'arraybuffer',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
},
timeout: 30000,
validateStatus: () => true,
});
} catch (err) {
console.error(`[fallback-error] msg=${err.message}`);
return null;
}
}
/**
* 执行回源、备用请求并写入缓存
* @param {string} upstreamUrl
* @param {string} targetUrl
* @param {string} key
* @returns {Promise<{data: Buffer, contentType: string, status: number}>}
*/
async function fetchAndCache(upstreamUrl, targetUrl, key) {
const { data: dataPath, meta: metaPath } = getCachePaths(key);
// 1. 回源请求
let finalResp;
let isFallback = false;
try {
const resp = await fetchUpstreamWithRetry(upstreamUrl);
finalResp = resp;
// 2. 检查响应是否有效
if (!isValidImageResponse(resp.status, resp.headers, resp.data)) {
console.log(`[upstream-invalid] url=${upstreamUrl} status=${resp.status} len=${resp.data.byteLength}, trying fallback...`);
const fallbackResp = await fetchFallbackWithRetry(targetUrl);
if (fallbackResp && isValidImageResponse(fallbackResp.status, fallbackResp.headers, fallbackResp.data)) {
console.log(`[fallback-success] url=${targetUrl}`);
finalResp = fallbackResp;
isFallback = true;
} else {
console.log(`[fallback-failed] url=${targetUrl}, returning original response`);
}
}
} catch (err) {
console.error(`[upstream-failed] url=${upstreamUrl} err=${err.message}`);
// 如果回源彻底失败,抛出错误,以便上层处理(如兜底读取旧缓存)
throw err;
}
// 3. 仅缓存有效图片
if (isValidImageResponse(finalResp.status, finalResp.headers, finalResp.data)) {
const contentType = finalResp.headers['content-type'] || 'image/jpeg';
const meta = {
url: isFallback ? `fallback:${targetUrl}` : upstreamUrl,
contentType,
size: finalResp.data.byteLength,
createdAt: new Date().toISOString(),
source: isFallback ? 'thum.io' : 'mshots'
};
// 原子化写入:先写临时文件,再重命名
const tempSuffix = `.${Date.now()}-${Math.random().toString(36).slice(2)}.tmp`;
const tempDataPath = dataPath + tempSuffix;
const tempMetaPath = metaPath + tempSuffix;
try {
await fsPromises.writeFile(tempDataPath, finalResp.data);
await fsPromises.writeFile(tempMetaPath, JSON.stringify(meta));
// 重命名 (原子操作)
await fsPromises.rename(tempDataPath, dataPath);
await fsPromises.rename(tempMetaPath, metaPath);
} catch (e) {
console.error(`[cache-write-error] ${e.message}`);
// 尝试清理临时文件
try { await fsPromises.unlink(tempDataPath); } catch (_) {}
try { await fsPromises.unlink(tempMetaPath); } catch (_) {}
}
return {
status: 200,
headers: finalResp.headers,
data: finalResp.data,
contentType,
isFallback
};
}
// 无效响应,直接返回
return {
status: finalResp.status,
headers: finalResp.headers,
data: finalResp.data,
contentType: finalResp.headers['content-type'],
isFallback
};
}
/**
* 核心处理逻辑:检查缓存 -> (合并请求) -> 回源 -> (失败则) 备用接口 -> 写入缓存 -> 返回响应
* @param {object} res
* @param {string} upstreamUrl
* @param {string} targetUrl
*/
async function handleProxyRequest(res, upstreamUrl, targetUrl) {
const key = sha1(upstreamUrl);
const { data: dataPath, meta: metaPath } = getCachePaths(key);
// 1. 尝试读取缓存 (使用 async 版本)
try {
// 检查文件是否存在
await fsPromises.access(dataPath);
await fsPromises.access(metaPath);
const metaRaw = await fsPromises.readFile(metaPath, 'utf8');
let meta;
try {
meta = JSON.parse(metaRaw);
} catch (e) {
console.warn(`[cache-warn] meta corrupted for ${upstreamUrl}`);
}
if (meta) {
// 检查缓存有效期 (例如 30 天)
const cachedTime = new Date(meta.createdAt).getTime();
const now = Date.now();
const maxAge = 30 * 24 * 3600 * 1000;
if (now - cachedTime < maxAge) {
console.log(`[cache-hit] ${upstreamUrl}`);
res.type(meta.contentType);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
if (meta.source === 'thum.io') {
res.set('X-Source', 'fallback-thum.io-cache');
} else {
res.set('X-Source', 'mshots-cache');
}
const stream = fs.createReadStream(dataPath);
stream.on('error', (streamErr) => {
console.error(`[stream-error] ${streamErr.message}`);
if (!res.headersSent) res.end();
});
stream.pipe(res);
return;
} else {
console.log(`[cache-expired] ${upstreamUrl}`);
}
}
} catch (err) {
// 缓存未命中
}
// 2. 回源请求 (带请求合并/去重)
try {
let resultPromise;
if (pendingRequests.has(key)) {
console.log(`[coalesce-hit] joining pending request for ${upstreamUrl}`);
resultPromise = pendingRequests.get(key);
} else {
// 创建 Promise 链,确保 finally 包含在内,防止 Unhandled Rejection
resultPromise = fetchAndCache(upstreamUrl, targetUrl, key)
.finally(() => {
pendingRequests.delete(key);
});
pendingRequests.set(key, resultPromise);
}
const result = await resultPromise;
// 返回结果
res.status(result.status);
if (result.contentType) res.type(result.contentType);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
if (result.isFallback) {
res.set('X-Source', 'fallback-thum.io');
}
return res.send(result.data);
} catch (err) {
// 3. 回源彻底失败
// 若本地有缓存可兜底 (即使过期)
try {
await fsPromises.access(dataPath);
await fsPromises.access(metaPath);
const metaRaw = await fsPromises.readFile(metaPath, 'utf8');
const meta = JSON.parse(metaRaw);
if (meta.contentType && meta.contentType.toLowerCase().startsWith('image/')) {
console.log(`[fallback-cache] using stale cache for ${upstreamUrl}`);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
res.type(meta.contentType);
const stream = fs.createReadStream(dataPath);
stream.on('error', (streamErr) => {
console.error(`[stream-error] ${streamErr.message}`);
if (!res.headersSent) res.end();
});
stream.pipe(res);
return;
}
} catch (_) {}
console.error(`[upstream-failed-final] url=${upstreamUrl} err=${err.message}`);
if (!res.headersSent) {
return res.status(502).type('text/plain').send('Upstream error');
}
}
}
// 反代 mShots路径 /mshots/v1/...
app.use('/mshots/v1', async (req, res) => {
if (req.method !== 'GET') {
return res.status(405).type('text/plain').send('Method Not Allowed');
}
// 这里的 req.originalUrl 包含 /mshots/v1 前缀
// 我们假设用户可能访问 /mshots/v1/www.baidu.com
// 需要提取出后面的部分进行解析
const prefix = '/mshots/v1';
let pathPart = req.originalUrl;
if (pathPart.startsWith(prefix)) {
pathPart = pathPart.slice(prefix.length);
}
// 解析目标 URL补全协议
const targetUrl = await resolveTargetUrl(pathPart);
// 拼接完整上游 URL
const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl;
// 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数
// 返回一个带有加载动画的 HTML 页面,前端再请求真实图片
if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) {
const rawUrl = req.originalUrl.includes('?')
? `${req.originalUrl}&raw=true`
: `${req.originalUrl}?raw=true`;
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snapshot Loading...</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f2f5;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
}
.container {
text-align: center;
background: white;
padding: 2rem;
border-radius: 12px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
max-width: 90%;
}
.loader {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 1rem;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
img {
max-width: 100%;
height: auto;
border-radius: 8px;
display: none; /* 初始隐藏 */
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.status {
color: #666;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="container">
<div id="loading-state">
<div class="loader"></div>
<div class="status">Generating snapshot for<br><strong>${targetUrl}</strong>...</div>
<div style="font-size: 12px; color: #999;">This may take up to 30 seconds if not cached.</div>
</div>
<img id="result-img" src="${rawUrl}" alt="Snapshot" onload="showImage()" onerror="showError()">
<div id="error-state" style="display:none; color: #e74c3c;">
Failed to load snapshot.
</div>
</div>
<script>
function showImage() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('result-img').style.display = 'block';
}
function showError() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('error-state').style.display = 'block';
}
</script>
</body>
</html>
`;
return res.type('text/html').send(html);
}
return handleProxyRequest(res, upstreamUrl, targetUrl);
});
// 根路径处理:/https://example.com 或 /www.baidu.com
app.use(async (req, res) => {
if (req.method !== 'GET') {
return res.status(405).type('text/plain').send('Method Not Allowed');
}
if (req.path === '/') {
return res.type('text/plain').send('mShots proxy is running. Try /https://www.baidu.com or /www.baidu.com');
}
// 解析目标 URL补全协议
const targetUrl = await resolveTargetUrl(req.originalUrl);
// 拼接完整上游 URL
const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl;
// 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数
// 返回一个带有加载动画的 HTML 页面,前端再请求真实图片
if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) {
const rawUrl = req.originalUrl.includes('?')
? `${req.originalUrl}&raw=true`
: `${req.originalUrl}?raw=true`;
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snapshot Loading...</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f2f5;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
}
.container {
text-align: center;
background: white;
padding: 2rem;
border-radius: 12px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
max-width: 90%;
}
.loader {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 1rem;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
img {
max-width: 100%;
height: auto;
border-radius: 8px;
display: none; /* 初始隐藏 */
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.status {
color: #666;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="container">
<div id="loading-state">
<div class="loader"></div>
<div class="status">Generating snapshot for<br><strong>${targetUrl}</strong>...</div>
<div style="font-size: 12px; color: #999;">This may take up to 30 seconds if not cached.</div>
</div>
<img id="result-img" src="${rawUrl}" alt="Snapshot" onload="showImage()" onerror="showError()">
<div id="error-state" style="display:none; color: #e74c3c;">
Failed to load snapshot.
</div>
</div>
<script>
function showImage() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('result-img').style.display = 'block';
}
function showError() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('error-state').style.display = 'block';
}
</script>
</body>
</html>
`;
return res.type('text/html').send(html);
}
return handleProxyRequest(res, upstreamUrl, targetUrl);
});
// 简单根路由健康检查
// 根路径健康由上面的 app.use 处理
app.listen(PORT, () => {
console.log(`Proxy running at http://localhost:${PORT}`);
});