feat(资源抓取): 实现CSS依赖资源的自动抓取与存储
新增对CSS文件中引用的字体、图片等资源的自动抓取功能,保持与源路径相同的层级结构 扩展resolveTargetPath以支持字体和图片类型,新增resolveAssetPathForCss处理依赖路径 添加fetchCssDependencies方法解析CSS中的url引用并下载相关资源 更新README文档说明新增的字体依赖处理机制
This commit is contained in:
@@ -140,6 +140,12 @@
|
||||
- 本地保存:`cache/js/cdn.tailwindcss.com/index.js`
|
||||
- 对外访问:`/js/cdn.tailwindcss.com/index.js`
|
||||
|
||||
### 字体与依赖资源处理(CSS 自动抓取)
|
||||
|
||||
- 当抓取 `CSS` 文件时,会自动解析其中的 `url(...)` 引用,并尝试下载相对路径的依赖(如字体、图片等),统一保存到 `cache/css/...` 对应目录下,保持与源路径相同的层级结构。
|
||||
- 这样,形如 `@font-face { src: url(fonts/element-icons.woff) }` 的引用将会在本地落盘为:`/css/.../fonts/element-icons.woff`,无需跨域请求第三方源。
|
||||
- 失败的依赖抓取会被静默跳过,不影响主 `CSS` 的可用性。
|
||||
|
||||
## 去重策略
|
||||
|
||||
- 目标路径存在则跳过抓取,响应中返回 `skipped: true`
|
||||
@@ -148,6 +154,7 @@
|
||||
## 安全与白名单建议
|
||||
|
||||
- 推荐在 CDN/WAF 层配置防盗链白名单(如 `*.aaa.com`、`www.bbb.com`)
|
||||
- CORS 建议在 CDN/WAF/网关统一配置按域名的跨域放行策略,服务端默认不设置跨域响应头。
|
||||
- 管理接口仅保留 `GET /api/seed`,不提供外部 POST;如需更强控制可扩展签名 URL 校验(服务端或边缘验证令牌)
|
||||
|
||||
## 部署建议
|
||||
|
||||
BIN
cache/css/npm/element-ui@2.15.13/lib/theme-chalk/fonts/element-icons.ttf
vendored
Normal file
BIN
cache/css/npm/element-ui@2.15.13/lib/theme-chalk/fonts/element-icons.ttf
vendored
Normal file
Binary file not shown.
BIN
cache/css/npm/element-ui@2.15.13/lib/theme-chalk/fonts/element-icons.woff
vendored
Normal file
BIN
cache/css/npm/element-ui@2.15.13/lib/theme-chalk/fonts/element-icons.woff
vendored
Normal file
Binary file not shown.
1
cache/css/npm/element-ui@2.15.13/lib/theme-chalk/index.css
vendored
Normal file
1
cache/css/npm/element-ui@2.15.13/lib/theme-chalk/index.css
vendored
Normal file
File diff suppressed because one or more lines are too long
3
seed.txt
3
seed.txt
@@ -5,3 +5,6 @@
|
||||
https://cdn.jsdmirror.com/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css
|
||||
https://cdn.jsdmirror.com/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js
|
||||
https://cdn.tailwindcss.com
|
||||
|
||||
# Element UI 2.15.13 样式(用于验证字体依赖自动抓取)
|
||||
https://cdn.jsdmirror.com/npm/element-ui@2.15.13/lib/theme-chalk/index.css
|
||||
|
||||
133
server.js
133
server.js
@@ -71,9 +71,11 @@ function resolveTargetPath(urlStr, contentType) {
|
||||
let type
|
||||
if (ext === '.css') folder = CSS_DIR
|
||||
else if (ext === '.js') folder = JS_DIR
|
||||
else if (['.woff', '.woff2', '.ttf', '.otf', '.eot', '.svg', '.png', '.jpg', '.jpeg', '.gif', '.webp'].includes(ext)) folder = CSS_DIR
|
||||
else if (contentType) {
|
||||
const ct = (contentType || '').split(';')[0].trim()
|
||||
if (ct === 'text/css') folder = CSS_DIR
|
||||
else if (ct.startsWith('font/') || ct.startsWith('image/')) folder = CSS_DIR
|
||||
else folder = JS_DIR
|
||||
} else {
|
||||
folder = JS_DIR
|
||||
@@ -112,6 +114,114 @@ function resolveTargetPath(urlStr, contentType) {
|
||||
return { fullPath, folder: targetDir, filename: base, type }
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算适用于CSS依赖资源的保存路径(统一落在 CSS_DIR 下)
|
||||
* @param {string} urlStr 依赖资源的绝对URL
|
||||
* @returns {{fullPath:string, folder:string, filename:string}}
|
||||
*/
|
||||
function resolveAssetPathForCss(urlStr) {
|
||||
const u = new URL(urlStr)
|
||||
let base = path.basename(u.pathname)
|
||||
if (!base || base === '/') base = 'index'
|
||||
let subDir = path.dirname(u.pathname)
|
||||
if (subDir === '/' || subDir === '.') subDir = ''
|
||||
const raw = subDir.replace(/^\/+/, '').replace(/\\+/g, '/')
|
||||
const safeParts = raw.split('/').filter(p => p && p !== '..')
|
||||
let normalized = safeParts.join('/')
|
||||
if (!normalized) normalized = u.hostname
|
||||
const targetDir = normalized ? path.join(CSS_DIR, normalized) : CSS_DIR
|
||||
let fullPath = path.join(targetDir, base)
|
||||
const resolved = path.resolve(fullPath)
|
||||
const rootResolved = path.resolve(CSS_DIR)
|
||||
if (!resolved.startsWith(rootResolved)) {
|
||||
fullPath = path.join(rootResolved, base)
|
||||
}
|
||||
return { fullPath, folder: targetDir, filename: base }
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取CSS中的url(...)依赖列表(过滤data:等内联资源)
|
||||
* @param {string} cssText CSS文本内容
|
||||
* @returns {string[]}
|
||||
*/
|
||||
function extractCssUrls(cssText) {
|
||||
const out = []
|
||||
const re = /url\(\s*(["'])?([^"')]+)\1\s*\)/g
|
||||
let m
|
||||
while ((m = re.exec(cssText)) !== null) {
|
||||
const href = (m[2] || '').trim()
|
||||
if (!href || href.startsWith('data:')) continue
|
||||
out.push(href)
|
||||
}
|
||||
return Array.from(new Set(out))
|
||||
}
|
||||
|
||||
/**
|
||||
* 在保存CSS后,按相对路径抓取其依赖资源(如字体、图片)
|
||||
* @param {string} baseUrl CSS源的绝对URL
|
||||
* @param {Buffer} cssBuf CSS二进制内容
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async function fetchCssDependencies(baseUrl, cssBuf) {
|
||||
const cssText = cssBuf.toString('utf8')
|
||||
const refs = extractCssUrls(cssText)
|
||||
if (refs.length === 0) return
|
||||
const base = new URL(baseUrl)
|
||||
// 计算CSS对应的本地子目录(保持与源路径层级一致)
|
||||
let subDir = path.dirname(base.pathname)
|
||||
if (subDir === '/' || subDir === '.') subDir = ''
|
||||
const raw = subDir.replace(/^\/+/, '').replace(/\\+/g, '/')
|
||||
const safeParts = raw.split('/').filter(p => p && p !== '..')
|
||||
let normalized = safeParts.join('/')
|
||||
if (!normalized) normalized = base.hostname
|
||||
const targetDir = normalized ? path.join(CSS_DIR, normalized) : CSS_DIR
|
||||
if (!fs.existsSync(targetDir)) fs.mkdirSync(targetDir, { recursive: true })
|
||||
|
||||
// 回退候选源(dist路径包含前缀 css/npm/...,其他公共CDN通常为 npm/...)
|
||||
function buildFallbacks(absUrl) {
|
||||
try {
|
||||
const u = new URL(absUrl)
|
||||
const m = u.pathname.match(/\/(?:css\/)?npm\/([^/]+@[^/]+)\/(.+)/)
|
||||
if (m) {
|
||||
const pkg = m[1]
|
||||
const rest = m[2]
|
||||
return [
|
||||
`https://cdn.jsdelivr.net/npm/${pkg}/${rest}`,
|
||||
`https://unpkg.com/${pkg}/${rest}`
|
||||
]
|
||||
}
|
||||
} catch {}
|
||||
return []
|
||||
}
|
||||
|
||||
for (const rel of refs) {
|
||||
// 归一化相对路径,计算本地写入位置
|
||||
const relSafe = rel.replace(/\\+/g, '/').replace(/^\/+/, '')
|
||||
const relParts = relSafe.split('/').filter(p => p && p !== '..')
|
||||
const localPath = path.join(targetDir, ...relParts)
|
||||
const localDir = path.dirname(localPath)
|
||||
if (fs.existsSync(localPath)) continue
|
||||
if (!fs.existsSync(localDir)) fs.mkdirSync(localDir, { recursive: true })
|
||||
|
||||
// 依次尝试:原始源、回退源
|
||||
const primary = new URL(rel, baseUrl).toString()
|
||||
const candidates = [primary, ...buildFallbacks(primary)]
|
||||
let saved = false
|
||||
for (const c of candidates) {
|
||||
try {
|
||||
const resp = await axios.get(c, { responseType: 'arraybuffer', timeout: 20000, headers: { 'User-Agent': 'AssetCache/1.0', 'Accept': '*/*' } })
|
||||
fs.writeFileSync(localPath, Buffer.from(resp.data))
|
||||
saved = true
|
||||
break
|
||||
} catch {}
|
||||
}
|
||||
// 失败则跳过,避免影响主流程
|
||||
if (!saved) {
|
||||
// no-op
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从URL提取并净化子目录(与保存逻辑一致),用于构造公开访问路径
|
||||
* @param {string} urlStr 远程资源URL
|
||||
@@ -165,6 +275,13 @@ async function fetchAndStore(urlStr) {
|
||||
const pre = resolveTargetPath(urlStr, undefined)
|
||||
if (fs.existsSync(pre.fullPath)) {
|
||||
const stat = fs.statSync(pre.fullPath)
|
||||
// 若已存在且为CSS,仍尝试解析并抓取依赖资源
|
||||
if (pre.type === 'css') {
|
||||
try {
|
||||
const buf = fs.readFileSync(pre.fullPath)
|
||||
await fetchCssDependencies(urlStr, buf)
|
||||
} catch {}
|
||||
}
|
||||
return { url: urlStr, saved: pre.filename, size: stat.size, type: pre.type, skipped: true }
|
||||
}
|
||||
|
||||
@@ -181,7 +298,11 @@ async function fetchAndStore(urlStr) {
|
||||
const { fullPath, folder, filename, type } = resolveTargetPath(urlStr, contentType)
|
||||
if (!fs.existsSync(folder)) fs.mkdirSync(folder, { recursive: true })
|
||||
|
||||
fs.writeFileSync(fullPath, Buffer.from(response.data))
|
||||
const buf = Buffer.from(response.data)
|
||||
fs.writeFileSync(fullPath, buf)
|
||||
if (type === 'css') {
|
||||
await fetchCssDependencies(urlStr, buf)
|
||||
}
|
||||
const stat = fs.statSync(fullPath)
|
||||
return { url: urlStr, saved: filename, size: stat.size, type, skipped: false }
|
||||
}
|
||||
@@ -220,8 +341,14 @@ ensureCacheDirs()
|
||||
registerPublicHomepage(app)
|
||||
|
||||
// 静态服务:/css 与 /js 直接映射到缓存目录
|
||||
app.use('/css', express.static(CSS_DIR, { maxAge: '365d', immutable: true }))
|
||||
app.use('/js', express.static(JS_DIR, { maxAge: '365d', immutable: true }))
|
||||
app.use('/css', express.static(CSS_DIR, {
|
||||
maxAge: '365d',
|
||||
immutable: true
|
||||
}))
|
||||
app.use('/js', express.static(JS_DIR, {
|
||||
maxAge: '365d',
|
||||
immutable: true
|
||||
}))
|
||||
|
||||
// 健康检查
|
||||
app.get('/health', (req, res) => {
|
||||
|
||||
Reference in New Issue
Block a user