feat(资源抓取): 实现CSS依赖资源的自动抓取与存储

新增对CSS文件中引用的字体、图片等资源的自动抓取功能,保持与源路径相同的层级结构
扩展resolveTargetPath以支持字体和图片类型,新增resolveAssetPathForCss处理依赖路径
添加fetchCssDependencies方法解析CSS中的url引用并下载相关资源
更新README文档说明新增的字体依赖处理机制
This commit is contained in:
2025-12-12 16:46:30 +08:00
parent eaf547981b
commit d47f125cea
6 changed files with 141 additions and 3 deletions

View File

@@ -140,6 +140,12 @@
- 本地保存:`cache/js/cdn.tailwindcss.com/index.js` - 本地保存:`cache/js/cdn.tailwindcss.com/index.js`
- 对外访问:`/js/cdn.tailwindcss.com/index.js` - 对外访问:`/js/cdn.tailwindcss.com/index.js`
### 字体与依赖资源处理CSS 自动抓取)
- 当抓取 `CSS` 文件时,会自动解析其中的 `url(...)` 引用,并尝试下载相对路径的依赖(如字体、图片等),统一保存到 `cache/css/...` 对应目录下,保持与源路径相同的层级结构。
- 这样,形如 `@font-face { src: url(fonts/element-icons.woff) }` 的引用将会在本地落盘为:`/css/.../fonts/element-icons.woff`,无需跨域请求第三方源。
- 失败的依赖抓取会被静默跳过,不影响主 `CSS` 的可用性。
## 去重策略 ## 去重策略
- 目标路径存在则跳过抓取,响应中返回 `skipped: true` - 目标路径存在则跳过抓取,响应中返回 `skipped: true`
@@ -148,6 +154,7 @@
## 安全与白名单建议 ## 安全与白名单建议
- 推荐在 CDN/WAF 层配置防盗链白名单(如 `*.aaa.com`、`www.bbb.com` - 推荐在 CDN/WAF 层配置防盗链白名单(如 `*.aaa.com`、`www.bbb.com`
- CORS 建议在 CDN/WAF/网关统一配置按域名的跨域放行策略,服务端默认不设置跨域响应头。
- 管理接口仅保留 `GET /api/seed`,不提供外部 POST如需更强控制可扩展签名 URL 校验(服务端或边缘验证令牌) - 管理接口仅保留 `GET /api/seed`,不提供外部 POST如需更强控制可扩展签名 URL 校验(服务端或边缘验证令牌)
## 部署建议 ## 部署建议

File diff suppressed because one or more lines are too long

View File

@@ -5,3 +5,6 @@
https://cdn.jsdmirror.com/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css https://cdn.jsdmirror.com/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css
https://cdn.jsdmirror.com/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js https://cdn.jsdmirror.com/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js
https://cdn.tailwindcss.com https://cdn.tailwindcss.com
# Element UI 2.15.13 样式(用于验证字体依赖自动抓取)
https://cdn.jsdmirror.com/npm/element-ui@2.15.13/lib/theme-chalk/index.css

133
server.js
View File

@@ -71,9 +71,11 @@ function resolveTargetPath(urlStr, contentType) {
let type let type
if (ext === '.css') folder = CSS_DIR if (ext === '.css') folder = CSS_DIR
else if (ext === '.js') folder = JS_DIR else if (ext === '.js') folder = JS_DIR
else if (['.woff', '.woff2', '.ttf', '.otf', '.eot', '.svg', '.png', '.jpg', '.jpeg', '.gif', '.webp'].includes(ext)) folder = CSS_DIR
else if (contentType) { else if (contentType) {
const ct = (contentType || '').split(';')[0].trim() const ct = (contentType || '').split(';')[0].trim()
if (ct === 'text/css') folder = CSS_DIR if (ct === 'text/css') folder = CSS_DIR
else if (ct.startsWith('font/') || ct.startsWith('image/')) folder = CSS_DIR
else folder = JS_DIR else folder = JS_DIR
} else { } else {
folder = JS_DIR folder = JS_DIR
@@ -112,6 +114,114 @@ function resolveTargetPath(urlStr, contentType) {
return { fullPath, folder: targetDir, filename: base, type } return { fullPath, folder: targetDir, filename: base, type }
} }
/**
* 计算适用于CSS依赖资源的保存路径统一落在 CSS_DIR 下)
* @param {string} urlStr 依赖资源的绝对URL
* @returns {{fullPath:string, folder:string, filename:string}}
*/
function resolveAssetPathForCss(urlStr) {
const u = new URL(urlStr)
let base = path.basename(u.pathname)
if (!base || base === '/') base = 'index'
let subDir = path.dirname(u.pathname)
if (subDir === '/' || subDir === '.') subDir = ''
const raw = subDir.replace(/^\/+/, '').replace(/\\+/g, '/')
const safeParts = raw.split('/').filter(p => p && p !== '..')
let normalized = safeParts.join('/')
if (!normalized) normalized = u.hostname
const targetDir = normalized ? path.join(CSS_DIR, normalized) : CSS_DIR
let fullPath = path.join(targetDir, base)
const resolved = path.resolve(fullPath)
const rootResolved = path.resolve(CSS_DIR)
if (!resolved.startsWith(rootResolved)) {
fullPath = path.join(rootResolved, base)
}
return { fullPath, folder: targetDir, filename: base }
}
/**
* 提取CSS中的url(...)依赖列表过滤data:等内联资源)
* @param {string} cssText CSS文本内容
* @returns {string[]}
*/
function extractCssUrls(cssText) {
const out = []
const re = /url\(\s*(["'])?([^"')]+)\1\s*\)/g
let m
while ((m = re.exec(cssText)) !== null) {
const href = (m[2] || '').trim()
if (!href || href.startsWith('data:')) continue
out.push(href)
}
return Array.from(new Set(out))
}
/**
* 在保存CSS后按相对路径抓取其依赖资源如字体、图片
* @param {string} baseUrl CSS源的绝对URL
* @param {Buffer} cssBuf CSS二进制内容
* @returns {Promise<void>}
*/
async function fetchCssDependencies(baseUrl, cssBuf) {
const cssText = cssBuf.toString('utf8')
const refs = extractCssUrls(cssText)
if (refs.length === 0) return
const base = new URL(baseUrl)
// 计算CSS对应的本地子目录保持与源路径层级一致
let subDir = path.dirname(base.pathname)
if (subDir === '/' || subDir === '.') subDir = ''
const raw = subDir.replace(/^\/+/, '').replace(/\\+/g, '/')
const safeParts = raw.split('/').filter(p => p && p !== '..')
let normalized = safeParts.join('/')
if (!normalized) normalized = base.hostname
const targetDir = normalized ? path.join(CSS_DIR, normalized) : CSS_DIR
if (!fs.existsSync(targetDir)) fs.mkdirSync(targetDir, { recursive: true })
// 回退候选源dist路径包含前缀 css/npm/...其他公共CDN通常为 npm/...
function buildFallbacks(absUrl) {
try {
const u = new URL(absUrl)
const m = u.pathname.match(/\/(?:css\/)?npm\/([^/]+@[^/]+)\/(.+)/)
if (m) {
const pkg = m[1]
const rest = m[2]
return [
`https://cdn.jsdelivr.net/npm/${pkg}/${rest}`,
`https://unpkg.com/${pkg}/${rest}`
]
}
} catch {}
return []
}
for (const rel of refs) {
// 归一化相对路径,计算本地写入位置
const relSafe = rel.replace(/\\+/g, '/').replace(/^\/+/, '')
const relParts = relSafe.split('/').filter(p => p && p !== '..')
const localPath = path.join(targetDir, ...relParts)
const localDir = path.dirname(localPath)
if (fs.existsSync(localPath)) continue
if (!fs.existsSync(localDir)) fs.mkdirSync(localDir, { recursive: true })
// 依次尝试:原始源、回退源
const primary = new URL(rel, baseUrl).toString()
const candidates = [primary, ...buildFallbacks(primary)]
let saved = false
for (const c of candidates) {
try {
const resp = await axios.get(c, { responseType: 'arraybuffer', timeout: 20000, headers: { 'User-Agent': 'AssetCache/1.0', 'Accept': '*/*' } })
fs.writeFileSync(localPath, Buffer.from(resp.data))
saved = true
break
} catch {}
}
// 失败则跳过,避免影响主流程
if (!saved) {
// no-op
}
}
}
/** /**
* 从URL提取并净化子目录与保存逻辑一致用于构造公开访问路径 * 从URL提取并净化子目录与保存逻辑一致用于构造公开访问路径
* @param {string} urlStr 远程资源URL * @param {string} urlStr 远程资源URL
@@ -165,6 +275,13 @@ async function fetchAndStore(urlStr) {
const pre = resolveTargetPath(urlStr, undefined) const pre = resolveTargetPath(urlStr, undefined)
if (fs.existsSync(pre.fullPath)) { if (fs.existsSync(pre.fullPath)) {
const stat = fs.statSync(pre.fullPath) const stat = fs.statSync(pre.fullPath)
// 若已存在且为CSS仍尝试解析并抓取依赖资源
if (pre.type === 'css') {
try {
const buf = fs.readFileSync(pre.fullPath)
await fetchCssDependencies(urlStr, buf)
} catch {}
}
return { url: urlStr, saved: pre.filename, size: stat.size, type: pre.type, skipped: true } return { url: urlStr, saved: pre.filename, size: stat.size, type: pre.type, skipped: true }
} }
@@ -181,7 +298,11 @@ async function fetchAndStore(urlStr) {
const { fullPath, folder, filename, type } = resolveTargetPath(urlStr, contentType) const { fullPath, folder, filename, type } = resolveTargetPath(urlStr, contentType)
if (!fs.existsSync(folder)) fs.mkdirSync(folder, { recursive: true }) if (!fs.existsSync(folder)) fs.mkdirSync(folder, { recursive: true })
fs.writeFileSync(fullPath, Buffer.from(response.data)) const buf = Buffer.from(response.data)
fs.writeFileSync(fullPath, buf)
if (type === 'css') {
await fetchCssDependencies(urlStr, buf)
}
const stat = fs.statSync(fullPath) const stat = fs.statSync(fullPath)
return { url: urlStr, saved: filename, size: stat.size, type, skipped: false } return { url: urlStr, saved: filename, size: stat.size, type, skipped: false }
} }
@@ -220,8 +341,14 @@ ensureCacheDirs()
registerPublicHomepage(app) registerPublicHomepage(app)
// 静态服务:/css 与 /js 直接映射到缓存目录 // 静态服务:/css 与 /js 直接映射到缓存目录
app.use('/css', express.static(CSS_DIR, { maxAge: '365d', immutable: true })) app.use('/css', express.static(CSS_DIR, {
app.use('/js', express.static(JS_DIR, { maxAge: '365d', immutable: true })) maxAge: '365d',
immutable: true
}))
app.use('/js', express.static(JS_DIR, {
maxAge: '365d',
immutable: true
}))
// 健康检查 // 健康检查
app.get('/health', (req, res) => { app.get('/health', (req, res) => {