const express = require('express'); const axios = require('axios'); const fs = require('fs'); const fsPromises = require('fs').promises; const path = require('path'); const crypto = require('crypto'); const net = require('net'); require('dotenv').config(); const app = express(); const PORT = Number(process.env.PORT) || 11489; const UPSTREAM_HOST = 'https://s0.wp.com'; const FALLBACK_HOST_BASE = 'https://image.thum.io/get/width/1024/crop/768/noanimate'; const CACHE_DIR = path.join(process.cwd(), 'cache'); fs.mkdirSync(CACHE_DIR, { recursive: true }); // 请求合并 Map (Deduplication) const pendingRequests = new Map(); /** * 计算 SHA1 哈希 * @param {string} input * @returns {string} */ function sha1(input) { return crypto.createHash('sha1').update(input).digest('hex'); } /** * 获取缓存文件路径 * @param {string} key * @returns {{data: string, meta: string}} */ function getCachePaths(key) { return { data: path.join(CACHE_DIR, `${key}.data`), meta: path.join(CACHE_DIR, `${key}.json`), }; } /** * 验证响应是否为有效的图片 * @param {number} status * @param {object} headers * @param {any} data * @returns {boolean} */ function isValidImageResponse(status, headers, data) { const ct = (headers['content-type'] || '').toLowerCase(); const lenHeader = headers['content-length']; const len = Array.isArray(data) ? data.length : (data?.byteLength || 0); const hasPositiveLength = (lenHeader ? parseInt(lenHeader, 10) > 0 : len > 0); // 过滤掉 GIF 图片 (通常是 mShots 的 "Generating" 占位图,约 9KB) // 我们不缓存这些图片,以便下次请求时能再次尝试获取真实截图 if (ct.includes('image/gif') && len < 15000) { return false; } return status === 200 && ct.startsWith('image/') && hasPositiveLength; } /** * 检查主机是否开放 443 端口 (简单的 SSL 判断) * @param {string} host * @returns {Promise} */ function checkPort443(host) { return new Promise(resolve => { // 默认超时 1.5 秒,避免阻塞太久 const socket = net.connect(443, host); socket.setTimeout(1500); socket.on('connect', () => { socket.end(); resolve(true); }); socket.on('error', () => { resolve(false); }); socket.on('timeout', () => { socket.destroy(); resolve(false); }); }); } /** * 解析目标 URL,支持自动补全协议 * @param {string} rawPath * @returns {Promise} */ async function resolveTargetUrl(rawPath) { // 去除开头的 / let target = rawPath.startsWith('/') ? rawPath.slice(1) : rawPath; // 如果已经包含协议,直接返回 if (target.startsWith('http://') || target.startsWith('https://')) { return target; } // 提取主机名 let host = target.split('/')[0].split('?')[0]; // 去除端口号(如果存在) const colonIndex = host.indexOf(':'); if (colonIndex !== -1) { host = host.substring(0, colonIndex); } // 尝试检测 SSL const isHttps = await checkPort443(host); if (!isHttps) { console.log(`[protocol-detect] ${host} : 443 port closed or timeout, falling back to HTTP.`); } return isHttps ? `https://${target}` : `http://${target}`; } /** * 请求上游并处理重试 * @param {string} upstreamUrl * @param {number} tries * @returns {Promise} */ async function fetchUpstreamWithRetry(upstreamUrl, tries = 2) { let lastErr = null; for (let i = 0; i < tries; i++) { try { return await axios.get(upstreamUrl, { responseType: 'arraybuffer', headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36', 'Accept': 'image/avif,image/webp,image/apng,image/*;q=0.8,*/*;q=0.5', 'Host': 's0.wp.com', }, maxRedirects: 5, timeout: 20000, // 禁用代理环境变量的干扰 proxy: false, // 明确允许非 2xx 也返回给上层判断 validateStatus: () => true, }); } catch (err) { lastErr = err; console.error(`[upstream-error] try=${i + 1} url=${upstreamUrl} msg=${err.message}`); // 简单退避 await new Promise(r => setTimeout(r, 300)); } } if (lastErr) throw lastErr; } /** * 请求备用接口 (thum.io) * @param {string} targetUrl * @returns {Promise} */ async function fetchFallbackWithRetry(targetUrl) { // thum.io 格式: https://image.thum.io/get// const fallbackUrl = `${FALLBACK_HOST_BASE}/${targetUrl}`; console.log(`[fallback-request] trying fallback: ${fallbackUrl}`); try { return await axios.get(fallbackUrl, { responseType: 'arraybuffer', headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', }, timeout: 30000, validateStatus: () => true, }); } catch (err) { console.error(`[fallback-error] msg=${err.message}`); return null; } } /** * 执行回源、备用请求并写入缓存 * @param {string} upstreamUrl * @param {string} targetUrl * @param {string} key * @returns {Promise<{data: Buffer, contentType: string, status: number}>} */ async function fetchAndCache(upstreamUrl, targetUrl, key) { const { data: dataPath, meta: metaPath } = getCachePaths(key); // 1. 回源请求 let finalResp; let isFallback = false; try { const resp = await fetchUpstreamWithRetry(upstreamUrl); finalResp = resp; // 2. 检查响应是否有效 if (!isValidImageResponse(resp.status, resp.headers, resp.data)) { console.log(`[upstream-invalid] url=${upstreamUrl} status=${resp.status} len=${resp.data.byteLength}, trying fallback...`); const fallbackResp = await fetchFallbackWithRetry(targetUrl); if (fallbackResp && isValidImageResponse(fallbackResp.status, fallbackResp.headers, fallbackResp.data)) { console.log(`[fallback-success] url=${targetUrl}`); finalResp = fallbackResp; isFallback = true; } else { console.log(`[fallback-failed] url=${targetUrl}, returning original response`); } } } catch (err) { console.error(`[upstream-failed] url=${upstreamUrl} err=${err.message}`); // 如果回源彻底失败,抛出错误,以便上层处理(如兜底读取旧缓存) throw err; } // 3. 仅缓存有效图片 if (isValidImageResponse(finalResp.status, finalResp.headers, finalResp.data)) { const contentType = finalResp.headers['content-type'] || 'image/jpeg'; const meta = { url: isFallback ? `fallback:${targetUrl}` : upstreamUrl, contentType, size: finalResp.data.byteLength, createdAt: new Date().toISOString(), source: isFallback ? 'thum.io' : 'mshots' }; // 原子化写入:先写临时文件,再重命名 const tempSuffix = `.${Date.now()}-${Math.random().toString(36).slice(2)}.tmp`; const tempDataPath = dataPath + tempSuffix; const tempMetaPath = metaPath + tempSuffix; try { await fsPromises.writeFile(tempDataPath, finalResp.data); await fsPromises.writeFile(tempMetaPath, JSON.stringify(meta)); // 重命名 (原子操作) await fsPromises.rename(tempDataPath, dataPath); await fsPromises.rename(tempMetaPath, metaPath); } catch (e) { console.error(`[cache-write-error] ${e.message}`); // 尝试清理临时文件 try { await fsPromises.unlink(tempDataPath); } catch (_) {} try { await fsPromises.unlink(tempMetaPath); } catch (_) {} } return { status: 200, headers: finalResp.headers, data: finalResp.data, contentType, isFallback }; } // 无效响应,直接返回 return { status: finalResp.status, headers: finalResp.headers, data: finalResp.data, contentType: finalResp.headers['content-type'], isFallback }; } /** * 核心处理逻辑:检查缓存 -> (合并请求) -> 回源 -> (失败则) 备用接口 -> 写入缓存 -> 返回响应 * @param {object} res * @param {string} upstreamUrl * @param {string} targetUrl */ async function handleProxyRequest(res, upstreamUrl, targetUrl) { const key = sha1(upstreamUrl); const { data: dataPath, meta: metaPath } = getCachePaths(key); // 1. 尝试读取缓存 (使用 async 版本) try { // 检查文件是否存在 await fsPromises.access(dataPath); await fsPromises.access(metaPath); const metaRaw = await fsPromises.readFile(metaPath, 'utf8'); let meta; try { meta = JSON.parse(metaRaw); } catch (e) { console.warn(`[cache-warn] meta corrupted for ${upstreamUrl}`); } if (meta) { // 检查缓存有效期 (例如 30 天) const cachedTime = new Date(meta.createdAt).getTime(); const now = Date.now(); const maxAge = 30 * 24 * 3600 * 1000; if (now - cachedTime < maxAge) { console.log(`[cache-hit] ${upstreamUrl}`); res.type(meta.contentType); res.set('Cache-Control', 'public, max-age=315360000, immutable'); if (meta.source === 'thum.io') { res.set('X-Source', 'fallback-thum.io-cache'); } else { res.set('X-Source', 'mshots-cache'); } const stream = fs.createReadStream(dataPath); stream.pipe(res); return; } else { console.log(`[cache-expired] ${upstreamUrl}`); } } } catch (err) { // 缓存未命中 } // 2. 回源请求 (带请求合并/去重) try { let resultPromise; if (pendingRequests.has(key)) { console.log(`[coalesce-hit] joining pending request for ${upstreamUrl}`); resultPromise = pendingRequests.get(key); } else { resultPromise = fetchAndCache(upstreamUrl, targetUrl, key); pendingRequests.set(key, resultPromise); // 无论成功失败,结束后移除 map resultPromise.finally(() => { pendingRequests.delete(key); }); } const result = await resultPromise; // 返回结果 res.status(result.status); if (result.contentType) res.type(result.contentType); res.set('Cache-Control', 'public, max-age=315360000, immutable'); if (result.isFallback) { res.set('X-Source', 'fallback-thum.io'); } return res.send(result.data); } catch (err) { // 3. 回源彻底失败 // 若本地有缓存可兜底 (即使过期) try { await fsPromises.access(dataPath); await fsPromises.access(metaPath); const metaRaw = await fsPromises.readFile(metaPath, 'utf8'); const meta = JSON.parse(metaRaw); if (meta.contentType && meta.contentType.toLowerCase().startsWith('image/')) { console.log(`[fallback-cache] using stale cache for ${upstreamUrl}`); res.set('Cache-Control', 'public, max-age=315360000, immutable'); res.type(meta.contentType); const stream = fs.createReadStream(dataPath); stream.pipe(res); return; } } catch (_) {} console.error(`[upstream-failed-final] url=${upstreamUrl} err=${err.message}`); if (!res.headersSent) { return res.status(502).type('text/plain').send('Upstream error'); } } } // 反代 mShots,路径 /mshots/v1/... app.use('/mshots/v1', async (req, res) => { if (req.method !== 'GET') { return res.status(405).type('text/plain').send('Method Not Allowed'); } // 这里的 req.originalUrl 包含 /mshots/v1 前缀 // 我们假设用户可能访问 /mshots/v1/www.baidu.com // 需要提取出后面的部分进行解析 const prefix = '/mshots/v1'; let pathPart = req.originalUrl; if (pathPart.startsWith(prefix)) { pathPart = pathPart.slice(prefix.length); } // 解析目标 URL(补全协议) const targetUrl = await resolveTargetUrl(pathPart); // 拼接完整上游 URL const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl; // 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数 // 返回一个带有加载动画的 HTML 页面,前端再请求真实图片 if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) { const rawUrl = req.originalUrl.includes('?') ? `${req.originalUrl}&raw=true` : `${req.originalUrl}?raw=true`; const html = ` Snapshot Loading...
Generating snapshot for
${targetUrl}...
This may take up to 30 seconds if not cached.
Snapshot
`; return res.type('text/html').send(html); } return handleProxyRequest(res, upstreamUrl, targetUrl); }); // 根路径处理:/https://example.com 或 /www.baidu.com app.use(async (req, res) => { if (req.method !== 'GET') { return res.status(405).type('text/plain').send('Method Not Allowed'); } if (req.path === '/') { return res.type('text/plain').send('mShots proxy is running. Try /https://www.baidu.com or /www.baidu.com'); } // 解析目标 URL(补全协议) const targetUrl = await resolveTargetUrl(req.originalUrl); // 拼接完整上游 URL const upstreamUrl = UPSTREAM_HOST + '/mshots/v1/' + targetUrl; // 浏览器访问优化:如果 Accept 包含 text/html 且没有 ?raw=true 参数 // 返回一个带有加载动画的 HTML 页面,前端再请求真实图片 if (req.headers.accept && req.headers.accept.includes('text/html') && !req.query.raw) { const rawUrl = req.originalUrl.includes('?') ? `${req.originalUrl}&raw=true` : `${req.originalUrl}?raw=true`; const html = ` Snapshot Loading...
Generating snapshot for
${targetUrl}...
This may take up to 30 seconds if not cached.
Snapshot
`; return res.type('text/html').send(html); } return handleProxyRequest(res, upstreamUrl, targetUrl); }); // 简单根路由健康检查 // 根路径健康由上面的 app.use 处理 app.listen(PORT, () => { console.log(`Proxy running at http://localhost:${PORT}`); });