1
0
Files
WordPress-mShots-Proxy/server.js
Snowz e9bfa3c61f feat(server): 实现请求合并和原子化缓存写入提升并发健壮性
- 新增请求合并机制,避免相同URL的高并发回源请求
- 采用原子化写入策略(临时文件+重命名)防止并发读写导致文件损坏
- 重构核心处理逻辑,将缓存操作提取为独立函数
- 优化错误处理和缓存兜底逻辑
2026-01-21 21:36:06 +08:00

587 lines
18 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const express = require('express');
const axios = require('axios');
const fs = require('fs');
const fsPromises = require('fs').promises;
const path = require('path');
const crypto = require('crypto');
const net = require('net');
require('dotenv').config();
const app = express();
const PORT = Number(process.env.PORT) || 11489;
const UPSTREAM_HOST = 'https://s0.wp.com';
const FALLBACK_HOST_BASE = 'https://image.thum.io/get/width/1024/crop/768/noanimate';
const CACHE_DIR = path.join(process.cwd(), 'cache');
fs.mkdirSync(CACHE_DIR, { recursive: true });
// 请求合并 Map (Deduplication)
const pendingRequests = new Map();
/**
* 计算 SHA1 哈希
* @param {string} input
* @returns {string}
*/
function sha1(input) {
return crypto.createHash('sha1').update(input).digest('hex');
}
/**
* 获取缓存文件路径
* @param {string} key
* @returns {{data: string, meta: string}}
*/
function getCachePaths(key) {
return {
data: path.join(CACHE_DIR, `${key}.data`),
meta: path.join(CACHE_DIR, `${key}.json`),
};
}
/**
* 验证响应是否为有效的图片
* @param {number} status
* @param {object} headers
* @param {any} data
* @returns {boolean}
*/
function isValidImageResponse(status, headers, data) {
const ct = (headers['content-type'] || '').toLowerCase();
const lenHeader = headers['content-length'];
const len = Array.isArray(data) ? data.length : (data?.byteLength || 0);
const hasPositiveLength = (lenHeader ? parseInt(lenHeader, 10) > 0 : len > 0);
// 过滤掉 GIF 图片 (通常是 mShots 的 "Generating" 占位图,约 9KB)
// 我们不缓存这些图片,以便下次请求时能再次尝试获取真实截图
if (ct.includes('image/gif') && len < 15000) {
return false;
}
return status === 200 && ct.startsWith('image/') && hasPositiveLength;
}
/**
* 检查主机是否开放 443 端口 (简单的 SSL 判断)
* @param {string} host
* @returns {Promise<boolean>}
*/
function checkPort443(host) {
return new Promise(resolve => {
// 默认超时 1.5 秒,避免阻塞太久
const socket = net.connect(443, host);
socket.setTimeout(1500);
socket.on('connect', () => {
socket.end();
resolve(true);
});
socket.on('error', () => {
resolve(false);
});
socket.on('timeout', () => {
socket.destroy();
resolve(false);
});
});
}
/**
* 解析目标 URL支持自动补全协议
* @param {string} rawPath
* @returns {Promise<string>}
*/
async function resolveTargetUrl(rawPath) {
// 去除开头的 /
let target = rawPath.startsWith('/') ? rawPath.slice(1) : rawPath;
// 如果已经包含协议,直接返回
if (target.startsWith('http://') || target.startsWith('https://')) {
return target;
}
// 提取主机名
let host = target.split('/')[0].split('?')[0];
// 去除端口号(如果存在)
const colonIndex = host.indexOf(':');
if (colonIndex !== -1) {
host = host.substring(0, colonIndex);
}
// 尝试检测 SSL
const isHttps = await checkPort443(host);
if (!isHttps) {
console.log(`[protocol-detect] ${host} : 443 port closed or timeout, falling back to HTTP.`);
}
return isHttps ? `https://${target}` : `http://${target}`;
}
/**
* 请求上游并处理重试
* @param {string} upstreamUrl
* @param {number} tries
* @returns {Promise<any>}
*/
async function fetchUpstreamWithRetry(upstreamUrl, tries = 2) {
let lastErr = null;
for (let i = 0; i < tries; i++) {
try {
return await axios.get(upstreamUrl, {
responseType: 'arraybuffer',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36',
'Accept': 'image/avif,image/webp,image/apng,image/*;q=0.8,*/*;q=0.5',
'Host': 's0.wp.com',
},
maxRedirects: 5,
timeout: 20000,
// 禁用代理环境变量的干扰
proxy: false,
// 明确允许非 2xx 也返回给上层判断
validateStatus: () => true,
});
} catch (err) {
lastErr = err;
console.error(`[upstream-error] try=${i + 1} url=${upstreamUrl} msg=${err.message}`);
// 简单退避
await new Promise(r => setTimeout(r, 300));
}
}
if (lastErr) throw lastErr;
}
/**
* 请求备用接口 (thum.io)
* @param {string} targetUrl
* @returns {Promise<any>}
*/
async function fetchFallbackWithRetry(targetUrl) {
// thum.io 格式: https://image.thum.io/get/<options>/<url>
const fallbackUrl = `${FALLBACK_HOST_BASE}/${targetUrl}`;
console.log(`[fallback-request] trying fallback: ${fallbackUrl}`);
try {
return await axios.get(fallbackUrl, {
responseType: 'arraybuffer',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
},
timeout: 30000,
validateStatus: () => true,
});
} catch (err) {
console.error(`[fallback-error] msg=${err.message}`);
return null;
}
}
/**
* 执行回源、备用请求并写入缓存
* @param {string} upstreamUrl
* @param {string} targetUrl
* @param {string} key
* @returns {Promise<{data: Buffer, contentType: string, status: number}>}
*/
async function fetchAndCache(upstreamUrl, targetUrl, key) {
const { data: dataPath, meta: metaPath } = getCachePaths(key);
// 1. 回源请求
let finalResp;
let isFallback = false;
try {
const resp = await fetchUpstreamWithRetry(upstreamUrl);
finalResp = resp;
// 2. 检查响应是否有效
if (!isValidImageResponse(resp.status, resp.headers, resp.data)) {
console.log(`[upstream-invalid] url=${upstreamUrl} status=${resp.status} len=${resp.data.byteLength}, trying fallback...`);
const fallbackResp = await fetchFallbackWithRetry(targetUrl);
if (fallbackResp && isValidImageResponse(fallbackResp.status, fallbackResp.headers, fallbackResp.data)) {
console.log(`[fallback-success] url=${targetUrl}`);
finalResp = fallbackResp;
isFallback = true;
} else {
console.log(`[fallback-failed] url=${targetUrl}, returning original response`);
}
}
} catch (err) {
console.error(`[upstream-failed] url=${upstreamUrl} err=${err.message}`);
// 如果回源彻底失败,抛出错误,以便上层处理(如兜底读取旧缓存)
throw err;
}
// 3. 仅缓存有效图片
if (isValidImageResponse(finalResp.status, finalResp.headers, finalResp.data)) {
const contentType = finalResp.headers['content-type'] || 'image/jpeg';
const meta = {
url: isFallback ? `fallback:${targetUrl}` : upstreamUrl,
contentType,
size: finalResp.data.byteLength,
createdAt: new Date().toISOString(),
source: isFallback ? 'thum.io' : 'mshots'
};
// 原子化写入:先写临时文件,再重命名
const tempSuffix = `.${Date.now()}-${Math.random().toString(36).slice(2)}.tmp`;
const tempDataPath = dataPath + tempSuffix;
const tempMetaPath = metaPath + tempSuffix;
try {
await fsPromises.writeFile(tempDataPath, finalResp.data);
await fsPromises.writeFile(tempMetaPath, JSON.stringify(meta));
// 重命名 (原子操作)
await fsPromises.rename(tempDataPath, dataPath);
await fsPromises.rename(tempMetaPath, metaPath);
} catch (e) {
console.error(`[cache-write-error] ${e.message}`);
// 尝试清理临时文件
try { await fsPromises.unlink(tempDataPath); } catch (_) {}
try { await fsPromises.unlink(tempMetaPath); } catch (_) {}
}
return {
status: 200,
headers: finalResp.headers,
data: finalResp.data,
contentType,
isFallback
};
}
// 无效响应,直接返回
return {
status: finalResp.status,
headers: finalResp.headers,
data: finalResp.data,
contentType: finalResp.headers['content-type'],
isFallback
};
}
/**
* 核心处理逻辑:检查缓存 -> (合并请求) -> 回源 -> (失败则) 备用接口 -> 写入缓存 -> 返回响应
* @param {object} res
* @param {string} upstreamUrl
* @param {string} targetUrl
*/
async function handleProxyRequest(res, upstreamUrl, targetUrl) {
const key = sha1(upstreamUrl);
const { data: dataPath, meta: metaPath } = getCachePaths(key);
// 1. 尝试读取缓存 (使用 async 版本)
try {
// 检查文件是否存在
await fsPromises.access(dataPath);
await fsPromises.access(metaPath);
const metaRaw = await fsPromises.readFile(metaPath, 'utf8');
let meta;
try {
meta = JSON.parse(metaRaw);
} catch (e) {
console.warn(`[cache-warn] meta corrupted for ${upstreamUrl}`);
}
if (meta) {
// 检查缓存有效期 (例如 30 天)
const cachedTime = new Date(meta.createdAt).getTime();
const now = Date.now();
const maxAge = 30 * 24 * 3600 * 1000;
if (now - cachedTime < maxAge) {
console.log(`[cache-hit] ${upstreamUrl}`);
res.type(meta.contentType);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
if (meta.source === 'thum.io') {
res.set('X-Source', 'fallback-thum.io-cache');
} else {
res.set('X-Source', 'mshots-cache');
}
const stream = fs.createReadStream(dataPath);
stream.pipe(res);
return;
} else {
console.log(`[cache-expired] ${upstreamUrl}`);
}
}
} catch (err) {
// 缓存未命中
}
// 2. 回源请求 (带请求合并/去重)
try {
let resultPromise;
if (pendingRequests.has(key)) {
console.log(`[coalesce-hit] joining pending request for ${upstreamUrl}`);
resultPromise = pendingRequests.get(key);
} else {
resultPromise = fetchAndCache(upstreamUrl, targetUrl, key);
pendingRequests.set(key, resultPromise);
// 无论成功失败,结束后移除 map
resultPromise.finally(() => {
pendingRequests.delete(key);
});
}
const result = await resultPromise;
// 返回结果
res.status(result.status);
if (result.contentType) res.type(result.contentType);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
if (result.isFallback) {
res.set('X-Source', 'fallback-thum.io');
}
return res.send(result.data);
} catch (err) {
// 3. 回源彻底失败
// 若本地有缓存可兜底 (即使过期)
try {
await fsPromises.access(dataPath);
await fsPromises.access(metaPath);
const metaRaw = await fsPromises.readFile(metaPath, 'utf8');
const meta = JSON.parse(metaRaw);
if (meta.contentType && meta.contentType.toLowerCase().startsWith('image/')) {
console.log(`[fallback-cache] using stale cache for ${upstreamUrl}`);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
res.type(meta.contentType);
const stream = fs.createReadStream(dataPath);
stream.pipe(res);
return;
}
} catch (_) {}
console.error(`[upstream-failed-final] url=${upstreamUrl} err=${err.message}`);
if (!res.headersSent) {
return res.status(502).type('text/plain').send('Upstream error');
}
}
}
// 反代 mShots路径 /mshots/v1/...
app.use('/mshots/v1', async (req, res) => {
if (req.method !== 'GET') {
return res.status(405).type('text/plain').send('Method Not Allowed');
}
// 这里的 req.originalUrl 包含 /mshots/v1 前缀
// 我们假设用户可能访问 /mshots/v1/www.baidu.com
// 需要提取出后面的部分进行解析
const prefix = '/mshots/v1';
let pathPart = req.originalUrl;
if (pathPart.startsWith(prefix)) {
pathPart = pathPart.slice(prefix.length);
}
// 解析目标 URL补全协议
const targetUrl = await resolveTargetUrl(pathPart);
// 拼接完整上游 URL
const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl;
// 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数
// 返回一个带有加载动画的 HTML 页面,前端再请求真实图片
if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) {
const rawUrl = req.originalUrl.includes('?')
? `${req.originalUrl}&raw=true`
: `${req.originalUrl}?raw=true`;
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snapshot Loading...</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f2f5;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
}
.container {
text-align: center;
background: white;
padding: 2rem;
border-radius: 12px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
max-width: 90%;
}
.loader {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 1rem;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
img {
max-width: 100%;
height: auto;
border-radius: 8px;
display: none; /* 初始隐藏 */
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.status {
color: #666;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="container">
<div id="loading-state">
<div class="loader"></div>
<div class="status">Generating snapshot for<br><strong>${targetUrl}</strong>...</div>
<div style="font-size: 12px; color: #999;">This may take up to 30 seconds if not cached.</div>
</div>
<img id="result-img" src="${rawUrl}" alt="Snapshot" onload="showImage()" onerror="showError()">
<div id="error-state" style="display:none; color: #e74c3c;">
Failed to load snapshot.
</div>
</div>
<script>
function showImage() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('result-img').style.display = 'block';
}
function showError() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('error-state').style.display = 'block';
}
</script>
</body>
</html>
`;
return res.type('text/html').send(html);
}
return handleProxyRequest(res, upstreamUrl, targetUrl);
});
// 根路径处理:/https://example.com 或 /www.baidu.com
app.use(async (req, res) => {
if (req.method !== 'GET') {
return res.status(405).type('text/plain').send('Method Not Allowed');
}
if (req.path === '/') {
return res.type('text/plain').send('mShots proxy is running. Try /https://www.baidu.com or /www.baidu.com');
}
// 解析目标 URL补全协议
const targetUrl = await resolveTargetUrl(req.originalUrl);
// 拼接完整上游 URL
const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl;
// 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数
// 返回一个带有加载动画的 HTML 页面,前端再请求真实图片
if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) {
const rawUrl = req.originalUrl.includes('?')
? `${req.originalUrl}&raw=true`
: `${req.originalUrl}?raw=true`;
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snapshot Loading...</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f2f5;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
}
.container {
text-align: center;
background: white;
padding: 2rem;
border-radius: 12px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
max-width: 90%;
}
.loader {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 1rem;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
img {
max-width: 100%;
height: auto;
border-radius: 8px;
display: none; /* 初始隐藏 */
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.status {
color: #666;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="container">
<div id="loading-state">
<div class="loader"></div>
<div class="status">Generating snapshot for<br><strong>${targetUrl}</strong>...</div>
<div style="font-size: 12px; color: #999;">This may take up to 30 seconds if not cached.</div>
</div>
<img id="result-img" src="${rawUrl}" alt="Snapshot" onload="showImage()" onerror="showError()">
<div id="error-state" style="display:none; color: #e74c3c;">
Failed to load snapshot.
</div>
</div>
<script>
function showImage() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('result-img').style.display = 'block';
}
function showError() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('error-state').style.display = 'block';
}
</script>
</body>
</html>
`;
return res.type('text/html').send(html);
}
return handleProxyRequest(res, upstreamUrl, targetUrl);
});
// 简单根路由健康检查
// 根路径健康由上面的 app.use 处理
app.listen(PORT, () => {
console.log(`Proxy running at http://localhost:${PORT}`);
});