972 行
44 KiB
Python
972 行
44 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from collections import defaultdict
|
|
from typing import Any, Dict, Iterable, List, Tuple
|
|
|
|
from intel.config import ENTITIES_DIR
|
|
from intel.models import AdvisoryRecord
|
|
from intel.utils import isoformat, load_all_json, now_utc, parse_dt, slugify, unique
|
|
|
|
|
|
FAMILY_KEYWORDS = {
|
|
"xss": ["xss", "cross-site scripting", "content injection", "html injection", "dom xss"],
|
|
"sqli": ["sql injection", "sqli"],
|
|
"authz-bypass": ["authorization bypass", "access control", "auth bypass", "permission bypass"],
|
|
"ssrf": ["ssrf", "server-side request forgery"],
|
|
"file-upload": ["file upload", "upload bypass", "attachment"],
|
|
"request-smuggling": ["request smuggling", "http desync"],
|
|
"template-injection": ["template injection", "ssti"],
|
|
"deserialization": ["deserialization", "serialization"],
|
|
"proxy-boundary": ["proxy", "middleware", "header trust", "reverse proxy"],
|
|
"plugin-extension": ["plugin", "extension", "module", "theme", "addon"],
|
|
"session-token": ["token", "cookie", "session", "jwt"],
|
|
"path-traversal": ["path traversal", "directory traversal"],
|
|
"misconfiguration": ["misconfiguration", "default credentials", "admin panel", "debug"],
|
|
}
|
|
|
|
PLUGINISH_ENTITY_TYPES = {"plugin", "extension", "module", "theme"}
|
|
FAMILY_TO_REQUIRED_ROLE = {
|
|
"xss": "editor-or-admin",
|
|
"sqli": "anonymous-or-low-privileged",
|
|
"authz-bypass": "cross-tenant-or-low-privileged-user",
|
|
"ssrf": "editor-or-admin",
|
|
"file-upload": "authenticated-uploader",
|
|
"request-smuggling": "edge-access",
|
|
"template-injection": "template-editor-or-admin",
|
|
"deserialization": "application-integrator",
|
|
"proxy-boundary": "reverse-proxy-or-edge-client",
|
|
"plugin-extension": "plugin-manager-or-admin",
|
|
"session-token": "authenticated-user",
|
|
"path-traversal": "anonymous-or-low-privileged",
|
|
"misconfiguration": "operator-or-admin",
|
|
"unknown": "unknown",
|
|
}
|
|
|
|
|
|
def _advisory_dict(item: AdvisoryRecord | Dict[str, Any]) -> Dict[str, Any]:
|
|
if isinstance(item, AdvisoryRecord):
|
|
return item.to_dict()
|
|
return dict(item or {})
|
|
|
|
|
|
def infer_family(advisory: Dict[str, Any]) -> str:
|
|
text = " ".join(
|
|
filter(
|
|
None,
|
|
[
|
|
advisory.get("title"),
|
|
advisory.get("summary"),
|
|
advisory.get("system_id"),
|
|
advisory.get("package_name"),
|
|
" ".join(advisory.get("aliases", []) or []),
|
|
" ".join(advisory.get("secure_code_topics", []) or []),
|
|
],
|
|
)
|
|
).lower()
|
|
for family, keywords in FAMILY_KEYWORDS.items():
|
|
if any(keyword in text for keyword in keywords):
|
|
return family
|
|
return "unknown"
|
|
|
|
|
|
def _strip_package_version_suffix(value: str) -> str:
|
|
stripped = value.strip()
|
|
stripped = re.sub(r"/v\d+$", "", stripped)
|
|
return stripped
|
|
|
|
|
|
def _display_name(value: str, fallback: str) -> str:
|
|
candidate = (value or "").strip()
|
|
if not candidate:
|
|
return fallback
|
|
if candidate.startswith("github.com/"):
|
|
candidate = candidate.split("/", 1)[1]
|
|
candidate = candidate.replace("@", "").replace("/", " / ")
|
|
return candidate
|
|
|
|
|
|
def infer_entity_type(advisory: Dict[str, Any]) -> str:
|
|
package_name = (advisory.get("package_name") or "").lower()
|
|
text = " ".join(
|
|
filter(
|
|
None,
|
|
[
|
|
advisory.get("title"),
|
|
advisory.get("summary"),
|
|
advisory.get("package_name"),
|
|
],
|
|
)
|
|
).lower()
|
|
if "theme" in package_name or " theme" in text:
|
|
return "theme"
|
|
if "plugin" in package_name or " plugin" in text or "plugins" in text:
|
|
return "plugin"
|
|
if "extension" in package_name or " extension" in text:
|
|
return "extension"
|
|
if "module" in package_name or " module" in text:
|
|
return "module"
|
|
if package_name.startswith("github.com/") or package_name.count("/") >= 2:
|
|
return "repo"
|
|
if "/" in package_name:
|
|
return "package"
|
|
return "project"
|
|
|
|
|
|
def advisory_scope_for_entity(entity_type: str) -> str:
|
|
if entity_type in {"plugin", "extension", "module", "theme"}:
|
|
return entity_type
|
|
if entity_type == "repo":
|
|
return "repo"
|
|
if entity_type in {"package", "project"}:
|
|
return "package"
|
|
return "core"
|
|
|
|
|
|
def _repo_url_from_package(package_name: str) -> str:
|
|
package_name = _strip_package_version_suffix(package_name)
|
|
if package_name.startswith("github.com/"):
|
|
return f"https://{package_name}"
|
|
if package_name.count("/") == 1 and not package_name.startswith("@"):
|
|
owner, repo = package_name.split("/", 1)
|
|
if owner and repo:
|
|
return f"https://github.com/{owner}/{repo}"
|
|
return ""
|
|
|
|
|
|
def _github_repo_from_url(url: str) -> str:
|
|
match = re.match(r"https://github\.com/([^/]+)/([^/#?]+)", (url or "").strip(), re.IGNORECASE)
|
|
if not match:
|
|
return ""
|
|
return f"https://github.com/{match.group(1)}/{match.group(2)}"
|
|
|
|
|
|
def _marketplace_slug(url: str) -> str:
|
|
parts = [part for part in re.split(r"[/?#]+", (url or "").strip()) if part]
|
|
if not parts:
|
|
return ""
|
|
return parts[-1]
|
|
|
|
|
|
def _package_registry_url(package_name: str) -> str:
|
|
normalized = _strip_package_version_suffix(package_name)
|
|
if not normalized:
|
|
return ""
|
|
if normalized.startswith("@") or "/" not in normalized:
|
|
return f"https://www.npmjs.com/package/{normalized}"
|
|
if normalized.count("/") == 1 and not normalized.startswith("github.com/"):
|
|
return f"https://packagist.org/packages/{normalized}"
|
|
return ""
|
|
|
|
|
|
def _entity_id(system_id: str, entity_type: str, name: str) -> str:
|
|
return f"{system_id}--{entity_type}--{slugify(name)}"
|
|
|
|
|
|
def _pick_version_boundary(versions: Iterable[str], *, prefer_fixed: bool = False) -> str | None:
|
|
values = [str(value).strip() for value in versions if str(value).strip()]
|
|
if not values:
|
|
return None
|
|
direct = [value for value in values if re.search(r"\d", value)]
|
|
if direct:
|
|
return direct[0] if prefer_fixed else direct[-1]
|
|
return values[0]
|
|
|
|
|
|
def _version_confidence(affected: List[str], fixed: List[str]) -> Tuple[str, str, bool]:
|
|
if affected and fixed:
|
|
return "high", "", False
|
|
if affected or fixed:
|
|
return "medium", "official bulletin or mirrored source only exposed one side of the version boundary", True
|
|
return "low", "official bulletin or aggregated source did not expose explicit affected/fixed versions", True
|
|
|
|
|
|
def _entry_surface(family: str, scope: str) -> str:
|
|
mapping = {
|
|
"xss": "web-ui-render-path",
|
|
"sqli": "query-or-filter-parameter",
|
|
"authz-bypass": "privileged-route-or-object-reference",
|
|
"ssrf": "remote-fetch-or-webhook-endpoint",
|
|
"file-upload": "upload-or-import-surface",
|
|
"request-smuggling": "reverse-proxy-boundary",
|
|
"template-injection": "template-render-entry",
|
|
"deserialization": "serialized-input-boundary",
|
|
"proxy-boundary": "proxy-header-or-trust-boundary",
|
|
"plugin-extension": "extension-management-surface",
|
|
"session-token": "session-or-token-processing",
|
|
"path-traversal": "file-read-or-download-path",
|
|
"misconfiguration": "deployment-or-admin-surface",
|
|
}
|
|
return mapping.get(family, f"{scope}-surface")
|
|
|
|
|
|
def _request_paths(family: str, scope: str) -> List[str]:
|
|
mapping = {
|
|
"xss": ["/admin/editor", "/preview", "/rendered-content"],
|
|
"sqli": ["/search", "/filter", "/api/list"],
|
|
"authz-bypass": ["/admin/*", "/api/private/*", "/tenant/*"],
|
|
"ssrf": ["/webhook/test", "/remote-fetch", "/import-url"],
|
|
"file-upload": ["/upload", "/import", "/plugin/install"],
|
|
"request-smuggling": ["/ via reverse proxy", "front proxy -> app origin"],
|
|
"template-injection": ["/templates", "/email-preview", "/theme-editor"],
|
|
"deserialization": ["/api/import", "/queue/consumer", "/session/restore"],
|
|
"proxy-boundary": ["/middleware", "/x-forwarded-* trust path"],
|
|
"plugin-extension": ["/plugins", "/extensions", "/themes"],
|
|
"session-token": ["/login", "/callback", "/session"],
|
|
"path-traversal": ["/download", "/assets", "/attachment"],
|
|
"misconfiguration": ["/admin", "/debug", "/setup"],
|
|
}
|
|
return mapping.get(family, [f"/{scope}"])
|
|
|
|
|
|
def _input_shape(family: str) -> str:
|
|
mapping = {
|
|
"xss": "受控 HTML/Markdown/富文本输入,观察渲染上下文是否失去编码或净化。",
|
|
"sqli": "受控查询参数、排序字段或筛选值,观察是否突破预期查询边界。",
|
|
"authz-bypass": "使用低权限身份访问高权限对象或跨租户资源。",
|
|
"ssrf": "提交受控回环或哨兵 URL,验证协议、主机、IP 与重定向限制。",
|
|
"file-upload": "提交受控非执行样本,验证扩展名、MIME、落盘与执行权限。",
|
|
"request-smuggling": "构造受控冲突头部组合,仅验证代理与应用解析差异。",
|
|
"template-injection": "提交受控模板占位符,验证是否存在危险表达式求值。",
|
|
"deserialization": "提交受控序列化样本,验证类型恢复与危险对象实例化。",
|
|
"proxy-boundary": "提交受控代理头或来源头,验证信任边界和回源鉴权。",
|
|
"plugin-extension": "在扩展管理或扩展功能入口中提交受控配置/内容。",
|
|
"session-token": "使用短期测试令牌或会话,验证生命周期、绑定与失效逻辑。",
|
|
"path-traversal": "提交规范化路径片段,验证根目录限制与标准化处理。",
|
|
"misconfiguration": "检查默认入口、调试面板、弱默认项和暴露控制面。",
|
|
}
|
|
return mapping.get(family, "提交最小化、可审计、可回滚的受控输入。")
|
|
|
|
|
|
def _unsafe_behavior(family: str) -> str:
|
|
mapping = {
|
|
"xss": "输入在目标上下文执行或被浏览器解释为主动内容。",
|
|
"sqli": "响应、日志或 side effect 显示查询边界被打破。",
|
|
"authz-bypass": "低权限身份可访问本不应可见的数据或操作。",
|
|
"ssrf": "服务端向受控目标发起非预期请求。",
|
|
"file-upload": "上传样本被错误接受、可访问或位于可执行路径。",
|
|
"request-smuggling": "代理和应用对同一请求的边界解释不一致。",
|
|
"template-injection": "模板引擎对用户输入执行表达式求值。",
|
|
"deserialization": "反序列化恢复出危险类型或触发危险行为。",
|
|
"proxy-boundary": "仅凭代理头即可越过鉴权或来源控制。",
|
|
"plugin-extension": "扩展安装、配置或运行突破了信任边界。",
|
|
"session-token": "令牌或会话可被重放、固定或越权使用。",
|
|
"path-traversal": "可读取、列出或访问根目录之外资源。",
|
|
"misconfiguration": "默认设置暴露管理面、调试面或高权限动作。",
|
|
}
|
|
return mapping.get(family, "目标表现出超出设计边界的行为。")
|
|
|
|
|
|
def _evidence_points(family: str, advisory_scope: str) -> Tuple[List[str], List[str], List[str], List[str]]:
|
|
server = [
|
|
"应用日志中的命中路径、鉴权决策和异常栈",
|
|
"反向代理或边界层日志中的请求头、来源 IP 与路由决策",
|
|
]
|
|
browser = [
|
|
"基线截图与攻击后截图的 DOM/视觉差异",
|
|
"console、network 与 response metadata 中的异常信号",
|
|
]
|
|
db_fs = [
|
|
"数据库中新增/越权读取的测试数据",
|
|
"文件系统中新增上传样本、缓存条目或越权读取痕迹",
|
|
]
|
|
detection = [
|
|
"WAF / reverse proxy 异常日志、访问日志和告警",
|
|
"应用审计日志中的权限错误、重定向异常、模板渲染或上传落盘事件",
|
|
]
|
|
if family in {"request-smuggling", "proxy-boundary"}:
|
|
detection.append("上游代理与应用层对 Content-Length / Transfer-Encoding / forwarded headers 的解释差异")
|
|
if advisory_scope in {"plugin", "extension", "module", "theme"}:
|
|
server.append("插件/扩展管理日志、安装日志与版本清单")
|
|
db_fs.append("插件目录、主题目录或扩展配置表中的测试样本")
|
|
return server, browser, db_fs, detection
|
|
|
|
|
|
def _patch_validation_steps(family: str, advisory: Dict[str, Any]) -> List[str]:
|
|
patched_version = advisory.get("patched_version") or "修复版本"
|
|
version_assertion = advisory.get("affected_version_ranges") or advisory.get("affected_versions") or ["受影响版本区间"]
|
|
return [
|
|
f"确认目标版本从 `{', '.join(version_assertion[:3])}` 升级或回移到 `{patched_version}`。",
|
|
"保留同一组受控输入,在修复前后分别执行并比对响应、日志与浏览器证据。",
|
|
"确认修复后仅保留预期业务行为,不再触发越权、回显、异常渲染或错误请求。",
|
|
f"补充 `{family}` 族自动化回归,避免同类路径在插件、主题或代理链中回归。",
|
|
]
|
|
|
|
|
|
def build_workflow(advisory: Dict[str, Any], system: Dict[str, Any]) -> Dict[str, Any]:
|
|
family = advisory.get("workflow", {}).get("vuln_family") or infer_family(advisory)
|
|
scope = advisory.get("advisory_scope") or "core"
|
|
required_role = FAMILY_TO_REQUIRED_ROLE.get(family, "unknown")
|
|
affected_assertion = advisory.get("affected_version_ranges") or advisory.get("affected_versions") or ["需要从公告、锁文件、版本页或关于页面人工确认版本命中"]
|
|
server, browser, db_fs, detection = _evidence_points(family, scope)
|
|
return {
|
|
"workflow_id": f"{advisory.get('canonical_id')}--workflow",
|
|
"vuln_family": family,
|
|
"entry_surface": _entry_surface(family, scope),
|
|
"preconditions": [
|
|
"仅在 lab-local、lab-public 或明确授权目标中执行。",
|
|
f"确认目标命中版本断言: {', '.join(affected_assertion[:3])}",
|
|
f"若对象属于 `{scope}`,先确认扩展/仓库/包已启用并处于受影响版本。",
|
|
],
|
|
"required_role": required_role,
|
|
"affected_version_assertion": affected_assertion,
|
|
"trigger_vector": f"对 `{family}` 家族入口投递最小化、可审计、可回滚的受控输入,比较修复前后差异。",
|
|
"request_or_ui_path": _request_paths(family, scope),
|
|
"input_shape": _input_shape(family),
|
|
"expected_unsafe_behavior": _unsafe_behavior(family),
|
|
"server_evidence_points": server,
|
|
"browser_evidence_points": browser,
|
|
"db_or_fs_evidence_points": db_fs,
|
|
"detection_signals": detection,
|
|
"mitigation_summary": "优先升级到修复版本,并同时收紧输入校验、服务端鉴权、代理信任边界、扩展安装信任和审计日志。",
|
|
"patch_validation_steps": _patch_validation_steps(family, advisory),
|
|
"lab_safety_notes": [
|
|
"只使用回环地址、哨兵目标、无害样本或可回滚测试数据。",
|
|
"禁止造成持久破坏、越权下载真实数据或不可回滚 side effect。",
|
|
"如需浏览器证据,保留 baseline / proof 两份快照以及 console / network 记录。",
|
|
],
|
|
"review_state": "needs-version-gap-review" if advisory.get("version_resolution_needed") else "ready",
|
|
}
|
|
|
|
|
|
def build_advisory_extensions(advisory: Dict[str, Any], system: Dict[str, Any]) -> Dict[str, Any]:
|
|
entity_type = infer_entity_type(advisory)
|
|
advisory_scope = advisory_scope_for_entity(entity_type) if advisory.get("package_name") else "core"
|
|
root_system_id = advisory.get("system_id") or system.get("system_id")
|
|
package_name = advisory.get("package_name") or ""
|
|
child_entity_id = ""
|
|
if package_name:
|
|
child_entity_id = _entity_id(root_system_id, entity_type, package_name)
|
|
|
|
entity_refs = [
|
|
{
|
|
"entity_id": root_system_id,
|
|
"entity_type": "system",
|
|
"relation": "root-system",
|
|
"root_system_id": root_system_id,
|
|
"official": True,
|
|
}
|
|
]
|
|
if child_entity_id:
|
|
entity_refs.append(
|
|
{
|
|
"entity_id": child_entity_id,
|
|
"entity_type": entity_type,
|
|
"relation": "affected-component",
|
|
"root_system_id": root_system_id,
|
|
"official": advisory_scope == "core",
|
|
}
|
|
)
|
|
|
|
affected = unique(list(advisory.get("affected_version_ranges") or []) + list(advisory.get("affected_versions") or []))
|
|
fixed = unique(list(advisory.get("fixed_version_ranges") or []) + list(advisory.get("fixed_versions") or []))
|
|
version_confidence, gap_reason, version_resolution_needed = _version_confidence(affected, fixed)
|
|
affected_version_refs = unique(list(advisory.get("affected_version_refs") or []))
|
|
fixed_version_refs = unique(list(advisory.get("fixed_version_refs") or []))
|
|
patched_version_refs = unique(list(advisory.get("patched_version_refs") or []))
|
|
version_sync_confidence = advisory.get("version_sync_confidence") or version_confidence
|
|
if version_sync_confidence in {"medium", "high"} and (affected_version_refs or fixed_version_refs or patched_version_refs):
|
|
version_confidence = version_sync_confidence
|
|
gap_reason = advisory.get("version_gap_reason") or ""
|
|
version_resolution_needed = False
|
|
affected_components = [
|
|
{
|
|
"name": _display_name(package_name, system.get("display_name", root_system_id)),
|
|
"entity_id": child_entity_id or root_system_id,
|
|
"scope": advisory_scope,
|
|
"package_name": package_name or None,
|
|
"official": advisory_scope == "core",
|
|
}
|
|
]
|
|
|
|
version_sources = unique(
|
|
list(advisory.get("version_evidence_sources") or [])
|
|
+ [advisory.get("official_source_url")]
|
|
+ list(advisory.get("secondary_source_urls") or [])
|
|
)
|
|
enriched = {
|
|
"entity_refs": entity_refs,
|
|
"affected_components": affected_components,
|
|
"affected_version_ranges": affected,
|
|
"fixed_version_ranges": fixed,
|
|
"introduced_version": _pick_version_boundary(affected),
|
|
"patched_version": advisory.get("patched_version") or _pick_version_boundary(fixed, prefer_fixed=True),
|
|
"version_evidence_sources": version_sources,
|
|
"affected_version_refs": affected_version_refs,
|
|
"fixed_version_refs": fixed_version_refs,
|
|
"patched_version_refs": patched_version_refs,
|
|
"version_sync_confidence": version_sync_confidence,
|
|
"advisory_scope": advisory_scope,
|
|
"version_confidence": version_confidence,
|
|
"version_gap_reason": gap_reason,
|
|
"version_resolution_needed": version_resolution_needed,
|
|
}
|
|
enriched["workflow"] = build_workflow({**advisory, **enriched}, system)
|
|
return enriched
|
|
|
|
|
|
def enrich_advisory_record(advisory: AdvisoryRecord, system: Dict[str, Any]) -> AdvisoryRecord:
|
|
enriched = build_advisory_extensions(advisory.to_dict(), system)
|
|
for key, value in enriched.items():
|
|
setattr(advisory, key, value)
|
|
advisory.metadata = {
|
|
**(advisory.metadata or {}),
|
|
"entity_ref_count": len(advisory.entity_refs),
|
|
"advisory_scope": advisory.advisory_scope,
|
|
"version_confidence": advisory.version_confidence,
|
|
"workflow_id": advisory.workflow.get("workflow_id"),
|
|
}
|
|
return advisory
|
|
|
|
|
|
def _source_refs(system: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
refs: List[Dict[str, Any]] = []
|
|
for bucket in ("official_sources", "ecosystem_sources", "research_sources"):
|
|
for source in system.get(bucket, []) or []:
|
|
refs.append(
|
|
{
|
|
"name": source.get("name"),
|
|
"url": source.get("url"),
|
|
"kind": source.get("kind"),
|
|
"status": source.get("status"),
|
|
"bucket": bucket,
|
|
"official": bucket == "official_sources",
|
|
}
|
|
)
|
|
return refs
|
|
|
|
|
|
def _entity_payload(
|
|
*,
|
|
entity_id: str,
|
|
entity_type: str,
|
|
display_name: str,
|
|
parent_entity_id: str | None,
|
|
root_system_id: str,
|
|
category: str,
|
|
ecosystem: str,
|
|
official: bool,
|
|
status: str,
|
|
history_policy: str,
|
|
repo_url: str,
|
|
package_registry: str,
|
|
marketplace_url: str,
|
|
latest_version: str,
|
|
version_scheme: str,
|
|
source_refs: List[Dict[str, Any]],
|
|
) -> Dict[str, Any]:
|
|
return {
|
|
"entity_id": entity_id,
|
|
"entity_type": entity_type,
|
|
"display_name": display_name,
|
|
"parent_entity_id": parent_entity_id,
|
|
"root_system_id": root_system_id,
|
|
"category": category,
|
|
"ecosystem": ecosystem,
|
|
"official": official,
|
|
"status": status,
|
|
"history_policy": history_policy,
|
|
"repo_url": repo_url,
|
|
"package_registry": package_registry,
|
|
"marketplace_url": marketplace_url,
|
|
"latest_version": latest_version,
|
|
"version_scheme": version_scheme,
|
|
"latest_release_at": "",
|
|
"latest_release_url": "",
|
|
"version_source_refs": [],
|
|
"version_sync_status": "pending",
|
|
"security_version_count": 0,
|
|
"last_version_synced_at": "",
|
|
"latest_version_evidence": [],
|
|
"catalog_source": "",
|
|
"catalog_reason": "",
|
|
"auto_cataloged": False,
|
|
"last_discovered_at": "",
|
|
"last_synced_at": "",
|
|
"history_backfill_status": "pending",
|
|
"latest_sync_status": "pending",
|
|
"official_source_covered": False,
|
|
"advisory_count": 0,
|
|
"workflow_complete_advisory_count": 0,
|
|
"version_mapped_advisory_count": 0,
|
|
"first_advisory_at": "",
|
|
"latest_advisory_at": "",
|
|
"advisory_ids": [],
|
|
"source_refs": source_refs,
|
|
}
|
|
|
|
|
|
def _merge_source_refs(primary: List[Dict[str, Any]], secondary: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
merged: List[Dict[str, Any]] = []
|
|
seen = set()
|
|
for item in (primary or []) + (secondary or []):
|
|
if not isinstance(item, dict):
|
|
continue
|
|
key = (
|
|
item.get("name") or "",
|
|
item.get("url") or "",
|
|
item.get("kind") or "",
|
|
item.get("bucket") or "",
|
|
)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
merged.append(item)
|
|
return merged
|
|
|
|
|
|
def _merge_entity_overlay(entity: Dict[str, Any], overlay: Dict[str, Any] | None) -> Dict[str, Any]:
|
|
if not overlay:
|
|
return entity
|
|
merged = dict(entity)
|
|
for key in (
|
|
"status",
|
|
"history_policy",
|
|
"repo_url",
|
|
"package_registry",
|
|
"marketplace_url",
|
|
"latest_version",
|
|
"version_scheme",
|
|
"latest_release_at",
|
|
"latest_release_url",
|
|
"version_source_refs",
|
|
"version_sync_status",
|
|
"security_version_count",
|
|
"last_version_synced_at",
|
|
"latest_version_evidence",
|
|
"catalog_source",
|
|
"catalog_reason",
|
|
"auto_cataloged",
|
|
"last_discovered_at",
|
|
"last_synced_at",
|
|
"history_backfill_status",
|
|
"latest_sync_status",
|
|
"official_source_covered",
|
|
):
|
|
if key not in overlay:
|
|
continue
|
|
value = overlay.get(key)
|
|
if value in (None, "", [], {}):
|
|
continue
|
|
merged[key] = value
|
|
merged["source_refs"] = _merge_source_refs(entity.get("source_refs", []), overlay.get("source_refs", []))
|
|
return merged
|
|
|
|
|
|
def _update_entity_stats(entity: Dict[str, Any], advisories: List[Dict[str, Any]]) -> None:
|
|
advisory_ids = [item.get("canonical_id") for item in advisories if item.get("canonical_id")]
|
|
workflow_count = len([item for item in advisories if item.get("workflow", {}).get("workflow_id")])
|
|
version_mapped_count = len([item for item in advisories if not item.get("version_resolution_needed")])
|
|
timestamps = []
|
|
for advisory in advisories:
|
|
for field in ("published_at", "updated_at"):
|
|
dt = parse_dt(advisory.get(field))
|
|
if dt is not None:
|
|
timestamps.append(dt)
|
|
entity["advisory_count"] = len(advisories)
|
|
entity["workflow_complete_advisory_count"] = workflow_count
|
|
entity["version_mapped_advisory_count"] = version_mapped_count
|
|
entity["advisory_ids"] = advisory_ids
|
|
if timestamps:
|
|
entity["first_advisory_at"] = isoformat(min(timestamps))
|
|
entity["latest_advisory_at"] = isoformat(max(timestamps))
|
|
entity["last_discovered_at"] = entity["latest_advisory_at"]
|
|
entity["last_synced_at"] = entity["latest_advisory_at"]
|
|
entity["official_source_covered"] = bool(
|
|
entity.get("official_source_covered")
|
|
or any(source.get("official") for source in entity.get("source_refs", []))
|
|
or any(advisory.get("official_source_url") for advisory in advisories)
|
|
)
|
|
if entity["advisory_count"]:
|
|
if entity["entity_type"] == "system" and entity.get("history_policy") == "history-full":
|
|
entity["history_backfill_status"] = "complete"
|
|
elif entity["advisory_count"] >= 2 and entity["version_mapped_advisory_count"] >= max(1, entity["advisory_count"] - 1):
|
|
entity["history_backfill_status"] = "complete"
|
|
else:
|
|
entity["history_backfill_status"] = "seeded"
|
|
entity["latest_sync_status"] = "green" if entity["official_source_covered"] and entity["advisory_count"] else "pending"
|
|
if not entity.get("latest_version"):
|
|
for advisory in advisories:
|
|
patched = advisory.get("patched_version")
|
|
if patched:
|
|
entity["latest_version"] = patched
|
|
break
|
|
|
|
|
|
def _candidate_from_source(system: Dict[str, Any], source: Dict[str, Any], known_repo_urls: set[str]) -> Dict[str, Any] | None:
|
|
url = (source.get("url") or "").strip()
|
|
entity_type = source.get("entity_type_hint") or "project"
|
|
repo_url = _github_repo_from_url(url)
|
|
package_registry = ""
|
|
marketplace_url = ""
|
|
display_name = ""
|
|
stable_url = repo_url
|
|
if repo_url:
|
|
if repo_url in known_repo_urls:
|
|
return None
|
|
entity_type = source.get("entity_type_hint") or "repo"
|
|
match = re.match(r"https://github\.com/([^/]+)/([^/#?]+)", repo_url, re.IGNORECASE)
|
|
if match:
|
|
display_name = f"{match.group(1)} / {match.group(2)}"
|
|
elif "npmjs.com/package/" in url:
|
|
entity_type = source.get("entity_type_hint") or "package"
|
|
package_name = url.split("/package/", 1)[1].split("?", 1)[0].strip("/")
|
|
package_registry = f"https://www.npmjs.com/package/{package_name}"
|
|
display_name = package_name
|
|
stable_url = package_registry
|
|
elif "packagist.org/packages/" in url:
|
|
entity_type = source.get("entity_type_hint") or "package"
|
|
package_name = url.split("/packages/", 1)[1].split("?", 1)[0].strip("/")
|
|
package_registry = f"https://packagist.org/packages/{package_name}"
|
|
display_name = package_name.replace("/", " / ")
|
|
stable_url = package_registry
|
|
elif any(token in url.lower() for token in ("/plugins/", "/themes/", "/extensions/", "/modules/", "/marketplace/")):
|
|
marketplace_url = url
|
|
slug = _marketplace_slug(url)
|
|
display_name = slug.replace("-", " ")
|
|
stable_url = marketplace_url
|
|
else:
|
|
return None
|
|
if not display_name:
|
|
display_name = source.get("name") or system.get("display_name") or system.get("system_id")
|
|
return {
|
|
"candidate_id": f"{system.get('system_id')}--{entity_type}-candidate--{slugify(stable_url or display_name)}",
|
|
"root_system_id": system.get("system_id"),
|
|
"display_name": display_name,
|
|
"entity_type": entity_type,
|
|
"status": "candidate",
|
|
"reason": "source catalog exposed a stable security-related object that is not yet cataloged as an entity",
|
|
"source": url,
|
|
"source_name": source.get("name") or "",
|
|
"source_confidence": source.get("confidence") or "unknown",
|
|
"source_bucket": source.get("bucket_name") or "",
|
|
"auto_catalog": bool(source.get("auto_catalog")),
|
|
"repo_url": repo_url,
|
|
"package_registry": package_registry,
|
|
"marketplace_url": marketplace_url,
|
|
"risk": "medium",
|
|
"waiting_for": "确认是否应升级为 cataloged repo/plugin/package 实体并补齐安全相关版本与历史漏洞",
|
|
"canonical_id": "",
|
|
}
|
|
|
|
|
|
def build_entity_views(source_map: Dict[str, Any], advisories: List[AdvisoryRecord | Dict[str, Any]]) -> Dict[str, Any]:
|
|
generated_at = isoformat(now_utc())
|
|
systems = {item["system_id"]: item for item in source_map.get("systems", []) or [] if item.get("system_id")}
|
|
advisory_rows = [_advisory_dict(item) for item in advisories]
|
|
existing_entities = {
|
|
item.get("entity_id"): item
|
|
for item in load_all_json(ENTITIES_DIR)
|
|
if item.get("entity_id") and item.get("root_system_id") in systems
|
|
}
|
|
entities: Dict[str, Dict[str, Any]] = {}
|
|
advisories_by_entity: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
|
|
for system_id, system in systems.items():
|
|
history_policy = system.get("tier") or "history-full"
|
|
entities[system_id] = _merge_entity_overlay(
|
|
_entity_payload(
|
|
entity_id=system_id,
|
|
entity_type="system",
|
|
display_name=system.get("display_name", system_id),
|
|
parent_entity_id=None,
|
|
root_system_id=system_id,
|
|
category=system.get("category", "unknown"),
|
|
ecosystem=system.get("category", "unknown"),
|
|
official=True,
|
|
status="cataloged",
|
|
history_policy=history_policy,
|
|
repo_url="",
|
|
package_registry="",
|
|
marketplace_url="",
|
|
latest_version="",
|
|
version_scheme="vendor",
|
|
source_refs=_source_refs(system),
|
|
),
|
|
existing_entities.get(system_id),
|
|
)
|
|
|
|
for advisory in advisory_rows:
|
|
root_system_id = advisory.get("system_id")
|
|
system = systems.get(root_system_id, {})
|
|
refs = advisory.get("entity_refs") or [{"entity_id": root_system_id, "entity_type": "system"}]
|
|
for ref in refs:
|
|
entity_id = ref.get("entity_id")
|
|
if not entity_id:
|
|
continue
|
|
if entity_id not in entities:
|
|
package_name = advisory.get("package_name") or advisory.get("title") or entity_id
|
|
entity_type = ref.get("entity_type") or infer_entity_type(advisory)
|
|
entities[entity_id] = _merge_entity_overlay(
|
|
_entity_payload(
|
|
entity_id=entity_id,
|
|
entity_type=entity_type,
|
|
display_name=_display_name(package_name, entity_id),
|
|
parent_entity_id=root_system_id,
|
|
root_system_id=root_system_id,
|
|
category=system.get("category", advisory.get("category", "unknown")),
|
|
ecosystem=advisory.get("package_name", "").split("/", 1)[0] if advisory.get("package_name") else system.get("category", "unknown"),
|
|
official=entity_type in {"project", "repo"} and entity_type != "package",
|
|
status="cataloged",
|
|
history_policy="history-full",
|
|
repo_url=_repo_url_from_package(advisory.get("package_name") or ""),
|
|
package_registry=_package_registry_url(advisory.get("package_name") or ""),
|
|
marketplace_url="",
|
|
latest_version=advisory.get("patched_version") or "",
|
|
version_scheme="semver-ish" if advisory.get("package_name") else "vendor",
|
|
source_refs=[],
|
|
),
|
|
existing_entities.get(entity_id),
|
|
)
|
|
advisories_by_entity[entity_id].append(advisory)
|
|
|
|
for entity_id, advisories_for_entity in advisories_by_entity.items():
|
|
_update_entity_stats(entities[entity_id], advisories_for_entity)
|
|
entities[entity_id] = _merge_entity_overlay(entities[entity_id], existing_entities.get(entity_id))
|
|
|
|
for entity_id, item in existing_entities.items():
|
|
if entity_id in entities:
|
|
continue
|
|
entities[entity_id] = item
|
|
|
|
known_repo_urls = {entity.get("repo_url") for entity in entities.values() if entity.get("repo_url")}
|
|
candidate_map: Dict[str, Dict[str, Any]] = {}
|
|
for system in systems.values():
|
|
for bucket in ("official_sources", "ecosystem_sources"):
|
|
for source in system.get(bucket, []) or []:
|
|
candidate = _candidate_from_source(system, source, known_repo_urls)
|
|
if candidate:
|
|
candidate_map[candidate["candidate_id"]] = candidate
|
|
candidate_backlog = sorted(candidate_map.values(), key=lambda item: (item["root_system_id"], item["display_name"]))
|
|
|
|
system_summary: Dict[str, Dict[str, Any]] = {}
|
|
for entity in entities.values():
|
|
summary = system_summary.setdefault(
|
|
entity["root_system_id"],
|
|
{
|
|
"system_id": entity["root_system_id"],
|
|
"display_name": systems.get(entity["root_system_id"], {}).get("display_name", entity["root_system_id"]),
|
|
"cataloged_entity_total": 0,
|
|
"child_entity_total": 0,
|
|
"candidate_entity_total": 0,
|
|
"workflow_complete_count": 0,
|
|
"version_mapped_count": 0,
|
|
"official_source_covered_count": 0,
|
|
"history_full_complete_count": 0,
|
|
"latest_green_count": 0,
|
|
"version_gap_entity_count": 0,
|
|
"workflow_gap_entity_count": 0,
|
|
"plugin_total": 0,
|
|
"entity_type_counts": {},
|
|
"top_entities": [],
|
|
"backlog_preview": [],
|
|
},
|
|
)
|
|
summary["cataloged_entity_total"] += 1
|
|
if entity["entity_type"] != "system":
|
|
summary["child_entity_total"] += 1
|
|
summary["entity_type_counts"][entity["entity_type"]] = summary["entity_type_counts"].get(entity["entity_type"], 0) + 1
|
|
summary["workflow_complete_count"] += 1 if entity["advisory_count"] and entity["workflow_complete_advisory_count"] >= entity["advisory_count"] else 0
|
|
summary["version_mapped_count"] += 1 if entity["advisory_count"] and entity["version_mapped_advisory_count"] >= entity["advisory_count"] else 0
|
|
summary["official_source_covered_count"] += 1 if entity["official_source_covered"] else 0
|
|
summary["history_full_complete_count"] += 1 if entity.get("history_backfill_status") == "complete" else 0
|
|
summary["latest_green_count"] += 1 if entity.get("latest_sync_status") == "green" else 0
|
|
summary["version_gap_entity_count"] += 1 if entity.get("advisory_count") and entity.get("version_mapped_advisory_count", 0) < entity.get("advisory_count", 0) else 0
|
|
summary["workflow_gap_entity_count"] += 1 if entity.get("advisory_count") and entity.get("workflow_complete_advisory_count", 0) < entity.get("advisory_count", 0) else 0
|
|
if entity["entity_type"] in PLUGINISH_ENTITY_TYPES:
|
|
summary["plugin_total"] += 1
|
|
|
|
for system_id, summary in system_summary.items():
|
|
ranked_entities = sorted(
|
|
[
|
|
entity
|
|
for entity in entities.values()
|
|
if entity["root_system_id"] == system_id and entity["entity_type"] != "system"
|
|
],
|
|
key=lambda item: (-(item.get("advisory_count") or 0), item["entity_type"], item["display_name"].lower()),
|
|
)
|
|
summary["top_entities"] = [
|
|
{
|
|
"entity_id": entity["entity_id"],
|
|
"entity_type": entity["entity_type"],
|
|
"display_name": entity["display_name"],
|
|
"advisory_count": entity.get("advisory_count", 0),
|
|
"history_backfill_status": entity.get("history_backfill_status"),
|
|
"latest_sync_status": entity.get("latest_sync_status"),
|
|
}
|
|
for entity in ranked_entities[:5]
|
|
]
|
|
for candidate in candidate_backlog:
|
|
system_summary.setdefault(
|
|
candidate["root_system_id"],
|
|
{
|
|
"system_id": candidate["root_system_id"],
|
|
"display_name": systems.get(candidate["root_system_id"], {}).get("display_name", candidate["root_system_id"]),
|
|
"cataloged_entity_total": 0,
|
|
"child_entity_total": 0,
|
|
"candidate_entity_total": 0,
|
|
"workflow_complete_count": 0,
|
|
"version_mapped_count": 0,
|
|
"official_source_covered_count": 0,
|
|
"history_full_complete_count": 0,
|
|
"latest_green_count": 0,
|
|
"version_gap_entity_count": 0,
|
|
"workflow_gap_entity_count": 0,
|
|
"plugin_total": 0,
|
|
"entity_type_counts": {},
|
|
"top_entities": [],
|
|
"backlog_preview": [],
|
|
},
|
|
)["candidate_entity_total"] += 1
|
|
preview = system_summary[candidate["root_system_id"]]["backlog_preview"]
|
|
if len(preview) < 5:
|
|
preview.append(
|
|
{
|
|
"candidate_id": candidate["candidate_id"],
|
|
"display_name": candidate["display_name"],
|
|
"entity_type": candidate["entity_type"],
|
|
"risk": candidate["risk"],
|
|
}
|
|
)
|
|
|
|
cataloged_entities = [entity for entity in entities.values() if entity.get("status") == "cataloged"]
|
|
history_full_complete_count = len(
|
|
[entity for entity in cataloged_entities if entity.get("history_policy") == "history-full" and entity.get("history_backfill_status") == "complete"]
|
|
)
|
|
latest_green_count = len([entity for entity in cataloged_entities if entity.get("latest_sync_status") == "green"])
|
|
workflow_complete_count = len(
|
|
[entity for entity in cataloged_entities if entity.get("advisory_count") and entity.get("workflow_complete_advisory_count") >= entity.get("advisory_count")]
|
|
)
|
|
version_mapped_count = len(
|
|
[entity for entity in cataloged_entities if entity.get("advisory_count") and entity.get("version_mapped_advisory_count") >= entity.get("advisory_count")]
|
|
)
|
|
official_source_covered_count = len([entity for entity in cataloged_entities if entity.get("official_source_covered")])
|
|
plugin_history_full_count = len(
|
|
[
|
|
entity
|
|
for entity in cataloged_entities
|
|
if entity.get("entity_type") in PLUGINISH_ENTITY_TYPES and entity.get("history_backfill_status") == "complete"
|
|
]
|
|
)
|
|
|
|
queue_payload = {
|
|
"generated_at": generated_at,
|
|
"discovery_queue": {
|
|
"count": len(candidate_backlog),
|
|
"items": candidate_backlog[:200],
|
|
},
|
|
"history_queue": {
|
|
"count": len([entity for entity in cataloged_entities if entity.get("history_policy") == "history-full" and entity.get("history_backfill_status") != "complete"]),
|
|
"items": [
|
|
{
|
|
"entity_id": entity["entity_id"],
|
|
"display_name": entity["display_name"],
|
|
"root_system_id": entity["root_system_id"],
|
|
"history_policy": entity["history_policy"],
|
|
"history_backfill_status": entity["history_backfill_status"],
|
|
}
|
|
for entity in cataloged_entities
|
|
if entity.get("history_policy") == "history-full" and entity.get("history_backfill_status") != "complete"
|
|
][:200],
|
|
},
|
|
"latest_queue": {
|
|
"count": len([entity for entity in cataloged_entities if entity.get("latest_sync_status") != "green"]),
|
|
"items": [
|
|
{
|
|
"entity_id": entity["entity_id"],
|
|
"display_name": entity["display_name"],
|
|
"root_system_id": entity["root_system_id"],
|
|
"latest_sync_status": entity["latest_sync_status"],
|
|
"last_synced_at": entity["last_synced_at"],
|
|
}
|
|
for entity in cataloged_entities
|
|
if entity.get("latest_sync_status") != "green"
|
|
][:200],
|
|
},
|
|
"workflow_queue": {
|
|
"count": len([advisory for advisory in advisory_rows if advisory.get("workflow", {}).get("review_state") != "ready"]),
|
|
"items": [
|
|
{
|
|
"canonical_id": advisory.get("canonical_id"),
|
|
"system_id": advisory.get("system_id"),
|
|
"title": advisory.get("title"),
|
|
"review_state": advisory.get("workflow", {}).get("review_state"),
|
|
"version_resolution_needed": advisory.get("version_resolution_needed"),
|
|
}
|
|
for advisory in advisory_rows
|
|
if advisory.get("workflow", {}).get("review_state") != "ready"
|
|
][:200],
|
|
},
|
|
}
|
|
|
|
completeness = {
|
|
"generated_at": generated_at,
|
|
"cataloged_entity_total": len(cataloged_entities),
|
|
"candidate_entity_total": len(candidate_backlog),
|
|
"history_full_complete_count": history_full_complete_count,
|
|
"latest_green_count": latest_green_count,
|
|
"workflow_complete_count": workflow_complete_count,
|
|
"version_mapped_count": version_mapped_count,
|
|
"official_source_covered_count": official_source_covered_count,
|
|
"plugin_history_full_count": plugin_history_full_count,
|
|
"systems": sorted(system_summary.values(), key=lambda item: item["system_id"]),
|
|
"queues": {
|
|
key: value["count"] for key, value in queue_payload.items() if key.endswith("_queue")
|
|
},
|
|
}
|
|
|
|
report_lines = [
|
|
"# 分层实体覆盖与完整度报告",
|
|
"",
|
|
f"- 生成时间: `{generated_at}`",
|
|
f"- 已编目实体: `{completeness['cataloged_entity_total']}`",
|
|
f"- 待编目 backlog: `{completeness['candidate_entity_total']}`",
|
|
f"- history-full 已完成: `{completeness['history_full_complete_count']}`",
|
|
f"- latest green: `{completeness['latest_green_count']}`",
|
|
f"- workflow 完整: `{completeness['workflow_complete_count']}`",
|
|
f"- 版本映射完整: `{completeness['version_mapped_count']}`",
|
|
f"- 官方源覆盖: `{completeness['official_source_covered_count']}`",
|
|
f"- 插件 history-full 已完成: `{completeness['plugin_history_full_count']}`",
|
|
"",
|
|
"## 系统分层摘要",
|
|
"",
|
|
"| 系统 | cataloged entities | candidate backlog | workflow complete | version mapped | official covered | plugins |",
|
|
"| --- | ---: | ---: | ---: | ---: | ---: | ---: |",
|
|
]
|
|
for item in completeness["systems"]:
|
|
report_lines.append(
|
|
f"| {item['system_id']} | {item['cataloged_entity_total']} | {item['candidate_entity_total']} | {item['workflow_complete_count']} | {item['version_mapped_count']} | {item['official_source_covered_count']} | {item['plugin_total']} |"
|
|
)
|
|
|
|
backlog_lines = [
|
|
"# 分层实体发现 Backlog",
|
|
"",
|
|
f"- 生成时间: `{generated_at}`",
|
|
f"- 待编目数量: `{len(candidate_backlog)}`",
|
|
"",
|
|
"| candidate_id | root_system | entity_type | risk | reason | waiting_for | source |",
|
|
"| --- | --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
if candidate_backlog:
|
|
for item in candidate_backlog[:500]:
|
|
backlog_lines.append(
|
|
f"| {item['candidate_id']} | {item['root_system_id']} | {item['entity_type']} | {item['risk']} | {item['reason']} | {item['waiting_for']} | {item.get('source') or '-'} |"
|
|
)
|
|
else:
|
|
backlog_lines.append("| - | - | - | - | no backlog | - | - |")
|
|
|
|
entity_payloads = {
|
|
f"{entity['entity_id']}.json": entity for entity in sorted(entities.values(), key=lambda item: item["entity_id"])
|
|
}
|
|
|
|
return {
|
|
"generated_at": generated_at,
|
|
"entity_payloads": entity_payloads,
|
|
"entities": sorted(entities.values(), key=lambda item: item["entity_id"]),
|
|
"candidate_backlog": candidate_backlog,
|
|
"completeness": completeness,
|
|
"queues": queue_payload,
|
|
"catalog_report_markdown": "\n".join(report_lines),
|
|
"backlog_report_markdown": "\n".join(backlog_lines),
|
|
}
|