1
0
Files
WordPress-mShots-Proxy/server.js

587 lines
18 KiB
JavaScript
Raw Normal View History

const express = require('express');
const axios = require('axios');
const fs = require('fs');
const fsPromises = require('fs').promises;
const path = require('path');
const crypto = require('crypto');
const net = require('net');
require('dotenv').config();
const app = express();
const PORT = Number(process.env.PORT) || 11489;
const UPSTREAM_HOST = 'https://s0.wp.com';
const FALLBACK_HOST_BASE = 'https://image.thum.io/get/width/1024/crop/768/noanimate';
const CACHE_DIR = path.join(process.cwd(), 'cache');
fs.mkdirSync(CACHE_DIR, { recursive: true });
// 请求合并 Map (Deduplication)
const pendingRequests = new Map();
/**
* 计算 SHA1 哈希
* @param {string} input
* @returns {string}
*/
function sha1(input) {
return crypto.createHash('sha1').update(input).digest('hex');
}
/**
* 获取缓存文件路径
* @param {string} key
* @returns {{data: string, meta: string}}
*/
function getCachePaths(key) {
return {
data: path.join(CACHE_DIR, `${key}.data`),
meta: path.join(CACHE_DIR, `${key}.json`),
};
}
/**
* 验证响应是否为有效的图片
* @param {number} status
* @param {object} headers
* @param {any} data
* @returns {boolean}
*/
function isValidImageResponse(status, headers, data) {
const ct = (headers['content-type'] || '').toLowerCase();
const lenHeader = headers['content-length'];
const len = Array.isArray(data) ? data.length : (data?.byteLength || 0);
const hasPositiveLength = (lenHeader ? parseInt(lenHeader, 10) > 0 : len > 0);
// 过滤掉 GIF 图片 (通常是 mShots 的 "Generating" 占位图,约 9KB)
// 我们不缓存这些图片,以便下次请求时能再次尝试获取真实截图
if (ct.includes('image/gif') && len < 15000) {
return false;
}
return status === 200 && ct.startsWith('image/') && hasPositiveLength;
}
/**
* 检查主机是否开放 443 端口 (简单的 SSL 判断)
* @param {string} host
* @returns {Promise<boolean>}
*/
function checkPort443(host) {
return new Promise(resolve => {
// 默认超时 1.5 秒,避免阻塞太久
const socket = net.connect(443, host);
socket.setTimeout(1500);
socket.on('connect', () => {
socket.end();
resolve(true);
});
socket.on('error', () => {
resolve(false);
});
socket.on('timeout', () => {
socket.destroy();
resolve(false);
});
});
}
/**
* 解析目标 URL支持自动补全协议
* @param {string} rawPath
* @returns {Promise<string>}
*/
async function resolveTargetUrl(rawPath) {
// 去除开头的 /
let target = rawPath.startsWith('/') ? rawPath.slice(1) : rawPath;
// 如果已经包含协议,直接返回
if (target.startsWith('http://') || target.startsWith('https://')) {
return target;
}
// 提取主机名
let host = target.split('/')[0].split('?')[0];
// 去除端口号(如果存在)
const colonIndex = host.indexOf(':');
if (colonIndex !== -1) {
host = host.substring(0, colonIndex);
}
// 尝试检测 SSL
const isHttps = await checkPort443(host);
if (!isHttps) {
console.log(`[protocol-detect] ${host} : 443 port closed or timeout, falling back to HTTP.`);
}
return isHttps ? `https://${target}` : `http://${target}`;
}
/**
* 请求上游并处理重试
* @param {string} upstreamUrl
* @param {number} tries
* @returns {Promise<any>}
*/
async function fetchUpstreamWithRetry(upstreamUrl, tries = 2) {
let lastErr = null;
for (let i = 0; i < tries; i++) {
try {
return await axios.get(upstreamUrl, {
responseType: 'arraybuffer',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36',
'Accept': 'image/avif,image/webp,image/apng,image/*;q=0.8,*/*;q=0.5',
'Host': 's0.wp.com',
},
maxRedirects: 5,
timeout: 20000,
// 禁用代理环境变量的干扰
proxy: false,
// 明确允许非 2xx 也返回给上层判断
validateStatus: () => true,
});
} catch (err) {
lastErr = err;
console.error(`[upstream-error] try=${i + 1} url=${upstreamUrl} msg=${err.message}`);
// 简单退避
await new Promise(r => setTimeout(r, 300));
}
}
if (lastErr) throw lastErr;
}
/**
* 请求备用接口 (thum.io)
* @param {string} targetUrl
* @returns {Promise<any>}
*/
async function fetchFallbackWithRetry(targetUrl) {
// thum.io 格式: https://image.thum.io/get/<options>/<url>
const fallbackUrl = `${FALLBACK_HOST_BASE}/${targetUrl}`;
console.log(`[fallback-request] trying fallback: ${fallbackUrl}`);
try {
return await axios.get(fallbackUrl, {
responseType: 'arraybuffer',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
},
timeout: 30000,
validateStatus: () => true,
});
} catch (err) {
console.error(`[fallback-error] msg=${err.message}`);
return null;
}
}
/**
* 执行回源备用请求并写入缓存
* @param {string} upstreamUrl
* @param {string} targetUrl
* @param {string} key
* @returns {Promise<{data: Buffer, contentType: string, status: number}>}
*/
async function fetchAndCache(upstreamUrl, targetUrl, key) {
const { data: dataPath, meta: metaPath } = getCachePaths(key);
// 1. 回源请求
let finalResp;
let isFallback = false;
try {
const resp = await fetchUpstreamWithRetry(upstreamUrl);
finalResp = resp;
// 2. 检查响应是否有效
if (!isValidImageResponse(resp.status, resp.headers, resp.data)) {
console.log(`[upstream-invalid] url=${upstreamUrl} status=${resp.status} len=${resp.data.byteLength}, trying fallback...`);
const fallbackResp = await fetchFallbackWithRetry(targetUrl);
if (fallbackResp && isValidImageResponse(fallbackResp.status, fallbackResp.headers, fallbackResp.data)) {
console.log(`[fallback-success] url=${targetUrl}`);
finalResp = fallbackResp;
isFallback = true;
} else {
console.log(`[fallback-failed] url=${targetUrl}, returning original response`);
}
}
} catch (err) {
console.error(`[upstream-failed] url=${upstreamUrl} err=${err.message}`);
// 如果回源彻底失败,抛出错误,以便上层处理(如兜底读取旧缓存)
throw err;
}
// 3. 仅缓存有效图片
if (isValidImageResponse(finalResp.status, finalResp.headers, finalResp.data)) {
const contentType = finalResp.headers['content-type'] || 'image/jpeg';
const meta = {
url: isFallback ? `fallback:${targetUrl}` : upstreamUrl,
contentType,
size: finalResp.data.byteLength,
createdAt: new Date().toISOString(),
source: isFallback ? 'thum.io' : 'mshots'
};
// 原子化写入:先写临时文件,再重命名
const tempSuffix = `.${Date.now()}-${Math.random().toString(36).slice(2)}.tmp`;
const tempDataPath = dataPath + tempSuffix;
const tempMetaPath = metaPath + tempSuffix;
try {
await fsPromises.writeFile(tempDataPath, finalResp.data);
await fsPromises.writeFile(tempMetaPath, JSON.stringify(meta));
// 重命名 (原子操作)
await fsPromises.rename(tempDataPath, dataPath);
await fsPromises.rename(tempMetaPath, metaPath);
} catch (e) {
console.error(`[cache-write-error] ${e.message}`);
// 尝试清理临时文件
try { await fsPromises.unlink(tempDataPath); } catch (_) {}
try { await fsPromises.unlink(tempMetaPath); } catch (_) {}
}
return {
status: 200,
headers: finalResp.headers,
data: finalResp.data,
contentType,
isFallback
};
}
// 无效响应,直接返回
return {
status: finalResp.status,
headers: finalResp.headers,
data: finalResp.data,
contentType: finalResp.headers['content-type'],
isFallback
};
}
/**
* 核心处理逻辑检查缓存 -> (合并请求) -> 回源 -> (失败则) 备用接口 -> 写入缓存 -> 返回响应
* @param {object} res
* @param {string} upstreamUrl
* @param {string} targetUrl
*/
async function handleProxyRequest(res, upstreamUrl, targetUrl) {
const key = sha1(upstreamUrl);
const { data: dataPath, meta: metaPath } = getCachePaths(key);
// 1. 尝试读取缓存 (使用 async 版本)
try {
// 检查文件是否存在
await fsPromises.access(dataPath);
await fsPromises.access(metaPath);
const metaRaw = await fsPromises.readFile(metaPath, 'utf8');
let meta;
try {
meta = JSON.parse(metaRaw);
} catch (e) {
console.warn(`[cache-warn] meta corrupted for ${upstreamUrl}`);
}
if (meta) {
// 检查缓存有效期 (例如 30 天)
const cachedTime = new Date(meta.createdAt).getTime();
const now = Date.now();
const maxAge = 30 * 24 * 3600 * 1000;
if (now - cachedTime < maxAge) {
console.log(`[cache-hit] ${upstreamUrl}`);
res.type(meta.contentType);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
if (meta.source === 'thum.io') {
res.set('X-Source', 'fallback-thum.io-cache');
} else {
res.set('X-Source', 'mshots-cache');
}
const stream = fs.createReadStream(dataPath);
stream.pipe(res);
return;
} else {
console.log(`[cache-expired] ${upstreamUrl}`);
}
}
} catch (err) {
// 缓存未命中
}
// 2. 回源请求 (带请求合并/去重)
try {
let resultPromise;
if (pendingRequests.has(key)) {
console.log(`[coalesce-hit] joining pending request for ${upstreamUrl}`);
resultPromise = pendingRequests.get(key);
} else {
resultPromise = fetchAndCache(upstreamUrl, targetUrl, key);
pendingRequests.set(key, resultPromise);
// 无论成功失败,结束后移除 map
resultPromise.finally(() => {
pendingRequests.delete(key);
});
}
const result = await resultPromise;
// 返回结果
res.status(result.status);
if (result.contentType) res.type(result.contentType);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
if (result.isFallback) {
res.set('X-Source', 'fallback-thum.io');
}
return res.send(result.data);
} catch (err) {
// 3. 回源彻底失败
// 若本地有缓存可兜底 (即使过期)
try {
await fsPromises.access(dataPath);
await fsPromises.access(metaPath);
const metaRaw = await fsPromises.readFile(metaPath, 'utf8');
const meta = JSON.parse(metaRaw);
if (meta.contentType && meta.contentType.toLowerCase().startsWith('image/')) {
console.log(`[fallback-cache] using stale cache for ${upstreamUrl}`);
res.set('Cache-Control', 'public, max-age=315360000, immutable');
res.type(meta.contentType);
const stream = fs.createReadStream(dataPath);
stream.pipe(res);
return;
}
} catch (_) {}
console.error(`[upstream-failed-final] url=${upstreamUrl} err=${err.message}`);
if (!res.headersSent) {
return res.status(502).type('text/plain').send('Upstream error');
}
}
}
// 反代 mShots路径 /mshots/v1/...
app.use('/mshots/v1', async (req, res) => {
if (req.method !== 'GET') {
return res.status(405).type('text/plain').send('Method Not Allowed');
}
// 这里的 req.originalUrl 包含 /mshots/v1 前缀
// 我们假设用户可能访问 /mshots/v1/www.baidu.com
// 需要提取出后面的部分进行解析
const prefix = '/mshots/v1';
let pathPart = req.originalUrl;
if (pathPart.startsWith(prefix)) {
pathPart = pathPart.slice(prefix.length);
}
// 解析目标 URL补全协议
const targetUrl = await resolveTargetUrl(pathPart);
// 拼接完整上游 URL
const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl;
// 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数
// 返回一个带有加载动画的 HTML 页面,前端再请求真实图片
if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) {
const rawUrl = req.originalUrl.includes('?')
? `${req.originalUrl}&raw=true`
: `${req.originalUrl}?raw=true`;
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snapshot Loading...</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f2f5;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
}
.container {
text-align: center;
background: white;
padding: 2rem;
border-radius: 12px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
max-width: 90%;
}
.loader {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 1rem;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
img {
max-width: 100%;
height: auto;
border-radius: 8px;
display: none; /* 初始隐藏 */
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.status {
color: #666;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="container">
<div id="loading-state">
<div class="loader"></div>
<div class="status">Generating snapshot for<br><strong>${targetUrl}</strong>...</div>
<div style="font-size: 12px; color: #999;">This may take up to 30 seconds if not cached.</div>
</div>
<img id="result-img" src="${rawUrl}" alt="Snapshot" onload="showImage()" onerror="showError()">
<div id="error-state" style="display:none; color: #e74c3c;">
Failed to load snapshot.
</div>
</div>
<script>
function showImage() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('result-img').style.display = 'block';
}
function showError() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('error-state').style.display = 'block';
}
</script>
</body>
</html>
`;
return res.type('text/html').send(html);
}
return handleProxyRequest(res, upstreamUrl, targetUrl);
});
// 根路径处理:/https://example.com 或 /www.baidu.com
app.use(async (req, res) => {
if (req.method !== 'GET') {
return res.status(405).type('text/plain').send('Method Not Allowed');
}
if (req.path === '/') {
return res.type('text/plain').send('mShots proxy is running. Try /https://www.baidu.com or /www.baidu.com');
}
// 解析目标 URL补全协议
const targetUrl = await resolveTargetUrl(req.originalUrl);
// 拼接完整上游 URL
const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl;
// 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数
// 返回一个带有加载动画的 HTML 页面,前端再请求真实图片
if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) {
const rawUrl = req.originalUrl.includes('?')
? `${req.originalUrl}&raw=true`
: `${req.originalUrl}?raw=true`;
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snapshot Loading...</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f2f5;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
}
.container {
text-align: center;
background: white;
padding: 2rem;
border-radius: 12px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
max-width: 90%;
}
.loader {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 1rem;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
img {
max-width: 100%;
height: auto;
border-radius: 8px;
display: none; /* 初始隐藏 */
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.status {
color: #666;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="container">
<div id="loading-state">
<div class="loader"></div>
<div class="status">Generating snapshot for<br><strong>${targetUrl}</strong>...</div>
<div style="font-size: 12px; color: #999;">This may take up to 30 seconds if not cached.</div>
</div>
<img id="result-img" src="${rawUrl}" alt="Snapshot" onload="showImage()" onerror="showError()">
<div id="error-state" style="display:none; color: #e74c3c;">
Failed to load snapshot.
</div>
</div>
<script>
function showImage() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('result-img').style.display = 'block';
}
function showError() {
document.getElementById('loading-state').style.display = 'none';
document.getElementById('error-state').style.display = 'block';
}
</script>
</body>
</html>
`;
return res.type('text/html').send(html);
}
return handleProxyRequest(res, upstreamUrl, targetUrl);
});
// 简单根路由健康检查
// 根路径健康由上面的 app.use 处理
app.listen(PORT, () => {
console.log(`Proxy running at http://localhost:${PORT}`);
});