实现分层实体漏洞知识库与实体级完整度监控

这个提交包含在:
hao
2026-03-19 17:57:45 -07:00
父节点 49fe46ab89
当前提交 1e81279e32
修改 2712 个文件,包含 434447 行新增2774 行删除

查看文件

@@ -12,6 +12,7 @@ THREAT_INTEL_ROOT = ROOT / "08-threat-intel"
REGISTRY_ROOT = THREAT_INTEL_ROOT / "registry"
ADVISORIES_DIR = REGISTRY_ROOT / "advisories"
SYSTEMS_DIR = REGISTRY_ROOT / "systems"
ENTITIES_DIR = REGISTRY_ROOT / "entities"
RUNS_DIR = REGISTRY_ROOT / "runs"
TRIAGE_DIR = REGISTRY_ROOT / "triage"
GENERATED_DIR = THREAT_INTEL_ROOT / "generated"
@@ -26,6 +27,11 @@ MONITOR_SUMMARY_PATH = GENERATED_DIR / "monitor-summary.json"
SOURCE_CATALOG_AUDIT_PATH = GENERATED_DIR / "source-catalog-audit.json"
SOURCE_CATALOG_AUDIT_MD_PATH = GENERATED_DIR / "source-catalog-audit.md"
RETIRED_SOURCES_PATH = GENERATED_DIR / "retired-sources.json"
ENTITY_COMPLETENESS_PATH = GENERATED_DIR / "entity-completeness.json"
ENTITY_BACKLOG_PATH = GENERATED_DIR / "entity-discovery-backlog.json"
ENTITY_QUEUES_PATH = GENERATED_DIR / "entity-queues.json"
ENTITY_CATALOG_REPORT_MD_PATH = GENERATED_DIR / "entity-catalog-report.md"
ENTITY_BACKLOG_REPORT_MD_PATH = GENERATED_DIR / "entity-discovery-backlog.md"
STATE_DIR = Path.home() / ".local" / "state" / "websafe-intel"
STATE_PATH = STATE_DIR / "state.json"

769
scripts/intel/entities.py 普通文件
查看文件

@@ -0,0 +1,769 @@
from __future__ import annotations
import re
from collections import defaultdict
from typing import Any, Dict, Iterable, List, Tuple
from intel.models import AdvisoryRecord
from intel.utils import isoformat, now_utc, parse_dt, slugify, unique
FAMILY_KEYWORDS = {
"xss": ["xss", "cross-site scripting", "content injection", "html injection", "dom xss"],
"sqli": ["sql injection", "sqli"],
"authz-bypass": ["authorization bypass", "access control", "auth bypass", "permission bypass"],
"ssrf": ["ssrf", "server-side request forgery"],
"file-upload": ["file upload", "upload bypass", "attachment"],
"request-smuggling": ["request smuggling", "http desync"],
"template-injection": ["template injection", "ssti"],
"deserialization": ["deserialization", "serialization"],
"proxy-boundary": ["proxy", "middleware", "header trust", "reverse proxy"],
"plugin-extension": ["plugin", "extension", "module", "theme", "addon"],
"session-token": ["token", "cookie", "session", "jwt"],
"path-traversal": ["path traversal", "directory traversal"],
"misconfiguration": ["misconfiguration", "default credentials", "admin panel", "debug"],
}
PLUGINISH_ENTITY_TYPES = {"plugin", "extension", "module", "theme"}
FAMILY_TO_REQUIRED_ROLE = {
"xss": "editor-or-admin",
"sqli": "anonymous-or-low-privileged",
"authz-bypass": "cross-tenant-or-low-privileged-user",
"ssrf": "editor-or-admin",
"file-upload": "authenticated-uploader",
"request-smuggling": "edge-access",
"template-injection": "template-editor-or-admin",
"deserialization": "application-integrator",
"proxy-boundary": "reverse-proxy-or-edge-client",
"plugin-extension": "plugin-manager-or-admin",
"session-token": "authenticated-user",
"path-traversal": "anonymous-or-low-privileged",
"misconfiguration": "operator-or-admin",
"unknown": "unknown",
}
def _advisory_dict(item: AdvisoryRecord | Dict[str, Any]) -> Dict[str, Any]:
if isinstance(item, AdvisoryRecord):
return item.to_dict()
return dict(item or {})
def infer_family(advisory: Dict[str, Any]) -> str:
text = " ".join(
filter(
None,
[
advisory.get("title"),
advisory.get("summary"),
advisory.get("system_id"),
advisory.get("package_name"),
" ".join(advisory.get("aliases", []) or []),
" ".join(advisory.get("secure_code_topics", []) or []),
],
)
).lower()
for family, keywords in FAMILY_KEYWORDS.items():
if any(keyword in text for keyword in keywords):
return family
return "unknown"
def _strip_package_version_suffix(value: str) -> str:
stripped = value.strip()
stripped = re.sub(r"/v\d+$", "", stripped)
return stripped
def _display_name(value: str, fallback: str) -> str:
candidate = (value or "").strip()
if not candidate:
return fallback
if candidate.startswith("github.com/"):
candidate = candidate.split("/", 1)[1]
candidate = candidate.replace("@", "").replace("/", " / ")
return candidate
def infer_entity_type(advisory: Dict[str, Any]) -> str:
package_name = (advisory.get("package_name") or "").lower()
text = " ".join(
filter(
None,
[
advisory.get("title"),
advisory.get("summary"),
advisory.get("package_name"),
],
)
).lower()
if "theme" in package_name or " theme" in text:
return "theme"
if "plugin" in package_name or " plugin" in text or "plugins" in text:
return "plugin"
if "extension" in package_name or " extension" in text:
return "extension"
if "module" in package_name or " module" in text:
return "module"
if package_name.startswith("github.com/") or package_name.count("/") >= 2:
return "repo"
if "/" in package_name:
return "package"
return "project"
def advisory_scope_for_entity(entity_type: str) -> str:
if entity_type in {"plugin", "extension", "module", "theme"}:
return entity_type
if entity_type == "repo":
return "repo"
if entity_type in {"package", "project"}:
return "package"
return "core"
def _repo_url_from_package(package_name: str) -> str:
package_name = _strip_package_version_suffix(package_name)
if package_name.startswith("github.com/"):
return f"https://{package_name}"
if package_name.count("/") == 1 and not package_name.startswith("@"):
owner, repo = package_name.split("/", 1)
if owner and repo:
return f"https://github.com/{owner}/{repo}"
return ""
def _package_registry_url(package_name: str) -> str:
normalized = _strip_package_version_suffix(package_name)
if not normalized:
return ""
if normalized.startswith("@") or "/" not in normalized:
return f"https://www.npmjs.com/package/{normalized}"
if normalized.count("/") == 1 and not normalized.startswith("github.com/"):
return f"https://packagist.org/packages/{normalized}"
return ""
def _entity_id(system_id: str, entity_type: str, name: str) -> str:
return f"{system_id}--{entity_type}--{slugify(name)}"
def _pick_version_boundary(versions: Iterable[str], *, prefer_fixed: bool = False) -> str | None:
values = [str(value).strip() for value in versions if str(value).strip()]
if not values:
return None
direct = [value for value in values if re.search(r"\d", value)]
if direct:
return direct[0] if prefer_fixed else direct[-1]
return values[0]
def _version_confidence(affected: List[str], fixed: List[str]) -> Tuple[str, str, bool]:
if affected and fixed:
return "high", "", False
if affected or fixed:
return "medium", "official bulletin or mirrored source only exposed one side of the version boundary", True
return "low", "official bulletin or aggregated source did not expose explicit affected/fixed versions", True
def _entry_surface(family: str, scope: str) -> str:
mapping = {
"xss": "web-ui-render-path",
"sqli": "query-or-filter-parameter",
"authz-bypass": "privileged-route-or-object-reference",
"ssrf": "remote-fetch-or-webhook-endpoint",
"file-upload": "upload-or-import-surface",
"request-smuggling": "reverse-proxy-boundary",
"template-injection": "template-render-entry",
"deserialization": "serialized-input-boundary",
"proxy-boundary": "proxy-header-or-trust-boundary",
"plugin-extension": "extension-management-surface",
"session-token": "session-or-token-processing",
"path-traversal": "file-read-or-download-path",
"misconfiguration": "deployment-or-admin-surface",
}
return mapping.get(family, f"{scope}-surface")
def _request_paths(family: str, scope: str) -> List[str]:
mapping = {
"xss": ["/admin/editor", "/preview", "/rendered-content"],
"sqli": ["/search", "/filter", "/api/list"],
"authz-bypass": ["/admin/*", "/api/private/*", "/tenant/*"],
"ssrf": ["/webhook/test", "/remote-fetch", "/import-url"],
"file-upload": ["/upload", "/import", "/plugin/install"],
"request-smuggling": ["/ via reverse proxy", "front proxy -> app origin"],
"template-injection": ["/templates", "/email-preview", "/theme-editor"],
"deserialization": ["/api/import", "/queue/consumer", "/session/restore"],
"proxy-boundary": ["/middleware", "/x-forwarded-* trust path"],
"plugin-extension": ["/plugins", "/extensions", "/themes"],
"session-token": ["/login", "/callback", "/session"],
"path-traversal": ["/download", "/assets", "/attachment"],
"misconfiguration": ["/admin", "/debug", "/setup"],
}
return mapping.get(family, [f"/{scope}"])
def _input_shape(family: str) -> str:
mapping = {
"xss": "受控 HTML/Markdown/富文本输入,观察渲染上下文是否失去编码或净化。",
"sqli": "受控查询参数、排序字段或筛选值,观察是否突破预期查询边界。",
"authz-bypass": "使用低权限身份访问高权限对象或跨租户资源。",
"ssrf": "提交受控回环或哨兵 URL,验证协议、主机、IP 与重定向限制。",
"file-upload": "提交受控非执行样本,验证扩展名、MIME、落盘与执行权限。",
"request-smuggling": "构造受控冲突头部组合,仅验证代理与应用解析差异。",
"template-injection": "提交受控模板占位符,验证是否存在危险表达式求值。",
"deserialization": "提交受控序列化样本,验证类型恢复与危险对象实例化。",
"proxy-boundary": "提交受控代理头或来源头,验证信任边界和回源鉴权。",
"plugin-extension": "在扩展管理或扩展功能入口中提交受控配置/内容。",
"session-token": "使用短期测试令牌或会话,验证生命周期、绑定与失效逻辑。",
"path-traversal": "提交规范化路径片段,验证根目录限制与标准化处理。",
"misconfiguration": "检查默认入口、调试面板、弱默认项和暴露控制面。",
}
return mapping.get(family, "提交最小化、可审计、可回滚的受控输入。")
def _unsafe_behavior(family: str) -> str:
mapping = {
"xss": "输入在目标上下文执行或被浏览器解释为主动内容。",
"sqli": "响应、日志或 side effect 显示查询边界被打破。",
"authz-bypass": "低权限身份可访问本不应可见的数据或操作。",
"ssrf": "服务端向受控目标发起非预期请求。",
"file-upload": "上传样本被错误接受、可访问或位于可执行路径。",
"request-smuggling": "代理和应用对同一请求的边界解释不一致。",
"template-injection": "模板引擎对用户输入执行表达式求值。",
"deserialization": "反序列化恢复出危险类型或触发危险行为。",
"proxy-boundary": "仅凭代理头即可越过鉴权或来源控制。",
"plugin-extension": "扩展安装、配置或运行突破了信任边界。",
"session-token": "令牌或会话可被重放、固定或越权使用。",
"path-traversal": "可读取、列出或访问根目录之外资源。",
"misconfiguration": "默认设置暴露管理面、调试面或高权限动作。",
}
return mapping.get(family, "目标表现出超出设计边界的行为。")
def _evidence_points(family: str, advisory_scope: str) -> Tuple[List[str], List[str], List[str], List[str]]:
server = [
"应用日志中的命中路径、鉴权决策和异常栈",
"反向代理或边界层日志中的请求头、来源 IP 与路由决策",
]
browser = [
"基线截图与攻击后截图的 DOM/视觉差异",
"console、network 与 response metadata 中的异常信号",
]
db_fs = [
"数据库中新增/越权读取的测试数据",
"文件系统中新增上传样本、缓存条目或越权读取痕迹",
]
detection = [
"WAF / reverse proxy 异常日志、访问日志和告警",
"应用审计日志中的权限错误、重定向异常、模板渲染或上传落盘事件",
]
if family in {"request-smuggling", "proxy-boundary"}:
detection.append("上游代理与应用层对 Content-Length / Transfer-Encoding / forwarded headers 的解释差异")
if advisory_scope in {"plugin", "extension", "module", "theme"}:
server.append("插件/扩展管理日志、安装日志与版本清单")
db_fs.append("插件目录、主题目录或扩展配置表中的测试样本")
return server, browser, db_fs, detection
def _patch_validation_steps(family: str, advisory: Dict[str, Any]) -> List[str]:
patched_version = advisory.get("patched_version") or "修复版本"
version_assertion = advisory.get("affected_version_ranges") or advisory.get("affected_versions") or ["受影响版本区间"]
return [
f"确认目标版本从 `{', '.join(version_assertion[:3])}` 升级或回移到 `{patched_version}`。",
"保留同一组受控输入,在修复前后分别执行并比对响应、日志与浏览器证据。",
"确认修复后仅保留预期业务行为,不再触发越权、回显、异常渲染或错误请求。",
f"补充 `{family}` 族自动化回归,避免同类路径在插件、主题或代理链中回归。",
]
def build_workflow(advisory: Dict[str, Any], system: Dict[str, Any]) -> Dict[str, Any]:
family = advisory.get("workflow", {}).get("vuln_family") or infer_family(advisory)
scope = advisory.get("advisory_scope") or "core"
required_role = FAMILY_TO_REQUIRED_ROLE.get(family, "unknown")
affected_assertion = advisory.get("affected_version_ranges") or advisory.get("affected_versions") or ["需要从公告、锁文件、版本页或关于页面人工确认版本命中"]
server, browser, db_fs, detection = _evidence_points(family, scope)
return {
"workflow_id": f"{advisory.get('canonical_id')}--workflow",
"vuln_family": family,
"entry_surface": _entry_surface(family, scope),
"preconditions": [
"仅在 lab-local、lab-public 或明确授权目标中执行。",
f"确认目标命中版本断言: {', '.join(affected_assertion[:3])}",
f"若对象属于 `{scope}`,先确认扩展/仓库/包已启用并处于受影响版本。",
],
"required_role": required_role,
"affected_version_assertion": affected_assertion,
"trigger_vector": f"对 `{family}` 家族入口投递最小化、可审计、可回滚的受控输入,比较修复前后差异。",
"request_or_ui_path": _request_paths(family, scope),
"input_shape": _input_shape(family),
"expected_unsafe_behavior": _unsafe_behavior(family),
"server_evidence_points": server,
"browser_evidence_points": browser,
"db_or_fs_evidence_points": db_fs,
"detection_signals": detection,
"mitigation_summary": "优先升级到修复版本,并同时收紧输入校验、服务端鉴权、代理信任边界、扩展安装信任和审计日志。",
"patch_validation_steps": _patch_validation_steps(family, advisory),
"lab_safety_notes": [
"只使用回环地址、哨兵目标、无害样本或可回滚测试数据。",
"禁止造成持久破坏、越权下载真实数据或不可回滚 side effect。",
"如需浏览器证据,保留 baseline / proof 两份快照以及 console / network 记录。",
],
"review_state": "needs-version-gap-review" if advisory.get("version_resolution_needed") else "ready",
}
def build_advisory_extensions(advisory: Dict[str, Any], system: Dict[str, Any]) -> Dict[str, Any]:
entity_type = infer_entity_type(advisory)
advisory_scope = advisory_scope_for_entity(entity_type) if advisory.get("package_name") else "core"
root_system_id = advisory.get("system_id") or system.get("system_id")
package_name = advisory.get("package_name") or ""
child_entity_id = ""
if package_name:
child_entity_id = _entity_id(root_system_id, entity_type, package_name)
entity_refs = [
{
"entity_id": root_system_id,
"entity_type": "system",
"relation": "root-system",
"root_system_id": root_system_id,
"official": True,
}
]
if child_entity_id:
entity_refs.append(
{
"entity_id": child_entity_id,
"entity_type": entity_type,
"relation": "affected-component",
"root_system_id": root_system_id,
"official": advisory_scope == "core",
}
)
affected = list(advisory.get("affected_versions") or [])
fixed = list(advisory.get("fixed_versions") or [])
version_confidence, gap_reason, version_resolution_needed = _version_confidence(affected, fixed)
affected_components = [
{
"name": _display_name(package_name, system.get("display_name", root_system_id)),
"entity_id": child_entity_id or root_system_id,
"scope": advisory_scope,
"package_name": package_name or None,
"official": advisory_scope == "core",
}
]
version_sources = unique(
[advisory.get("official_source_url")] + list(advisory.get("secondary_source_urls") or [])
)
enriched = {
"entity_refs": entity_refs,
"affected_components": affected_components,
"affected_version_ranges": affected,
"fixed_version_ranges": fixed,
"introduced_version": _pick_version_boundary(affected),
"patched_version": _pick_version_boundary(fixed, prefer_fixed=True),
"version_evidence_sources": version_sources,
"advisory_scope": advisory_scope,
"version_confidence": version_confidence,
"version_gap_reason": gap_reason,
"version_resolution_needed": version_resolution_needed,
}
enriched["workflow"] = build_workflow({**advisory, **enriched}, system)
return enriched
def enrich_advisory_record(advisory: AdvisoryRecord, system: Dict[str, Any]) -> AdvisoryRecord:
enriched = build_advisory_extensions(advisory.to_dict(), system)
for key, value in enriched.items():
setattr(advisory, key, value)
advisory.metadata = {
**(advisory.metadata or {}),
"entity_ref_count": len(advisory.entity_refs),
"advisory_scope": advisory.advisory_scope,
"version_confidence": advisory.version_confidence,
"workflow_id": advisory.workflow.get("workflow_id"),
}
return advisory
def _source_refs(system: Dict[str, Any]) -> List[Dict[str, Any]]:
refs: List[Dict[str, Any]] = []
for bucket in ("official_sources", "ecosystem_sources", "research_sources"):
for source in system.get(bucket, []) or []:
refs.append(
{
"name": source.get("name"),
"url": source.get("url"),
"kind": source.get("kind"),
"status": source.get("status"),
"bucket": bucket,
"official": bucket == "official_sources",
}
)
return refs
def _entity_payload(
*,
entity_id: str,
entity_type: str,
display_name: str,
parent_entity_id: str | None,
root_system_id: str,
category: str,
ecosystem: str,
official: bool,
status: str,
history_policy: str,
repo_url: str,
package_registry: str,
marketplace_url: str,
latest_version: str,
version_scheme: str,
source_refs: List[Dict[str, Any]],
) -> Dict[str, Any]:
return {
"entity_id": entity_id,
"entity_type": entity_type,
"display_name": display_name,
"parent_entity_id": parent_entity_id,
"root_system_id": root_system_id,
"category": category,
"ecosystem": ecosystem,
"official": official,
"status": status,
"history_policy": history_policy,
"repo_url": repo_url,
"package_registry": package_registry,
"marketplace_url": marketplace_url,
"latest_version": latest_version,
"version_scheme": version_scheme,
"last_discovered_at": "",
"last_synced_at": "",
"history_backfill_status": "pending",
"latest_sync_status": "pending",
"official_source_covered": False,
"advisory_count": 0,
"workflow_complete_advisory_count": 0,
"version_mapped_advisory_count": 0,
"first_advisory_at": "",
"latest_advisory_at": "",
"advisory_ids": [],
"source_refs": source_refs,
}
def _update_entity_stats(entity: Dict[str, Any], advisories: List[Dict[str, Any]]) -> None:
advisory_ids = [item.get("canonical_id") for item in advisories if item.get("canonical_id")]
workflow_count = len([item for item in advisories if item.get("workflow", {}).get("workflow_id")])
version_mapped_count = len([item for item in advisories if not item.get("version_resolution_needed")])
timestamps = []
for advisory in advisories:
for field in ("published_at", "updated_at"):
dt = parse_dt(advisory.get(field))
if dt is not None:
timestamps.append(dt)
entity["advisory_count"] = len(advisories)
entity["workflow_complete_advisory_count"] = workflow_count
entity["version_mapped_advisory_count"] = version_mapped_count
entity["advisory_ids"] = advisory_ids
if timestamps:
entity["first_advisory_at"] = isoformat(min(timestamps))
entity["latest_advisory_at"] = isoformat(max(timestamps))
entity["last_discovered_at"] = entity["latest_advisory_at"]
entity["last_synced_at"] = entity["latest_advisory_at"]
entity["official_source_covered"] = bool(
entity.get("official_source_covered")
or any(source.get("official") for source in entity.get("source_refs", []))
or any(advisory.get("official_source_url") for advisory in advisories)
)
if entity["advisory_count"]:
if entity["entity_type"] == "system" and entity.get("history_policy") == "history-full":
entity["history_backfill_status"] = "complete"
elif entity["advisory_count"] >= 2 and entity["version_mapped_advisory_count"] >= max(1, entity["advisory_count"] - 1):
entity["history_backfill_status"] = "complete"
else:
entity["history_backfill_status"] = "seeded"
entity["latest_sync_status"] = "green" if entity["official_source_covered"] and entity["advisory_count"] else "pending"
if not entity.get("latest_version"):
for advisory in advisories:
patched = advisory.get("patched_version")
if patched:
entity["latest_version"] = patched
break
def _candidate_from_source(system: Dict[str, Any], source: Dict[str, Any], known_repo_urls: set[str]) -> Dict[str, Any] | None:
url = (source.get("url") or "").strip()
match = re.match(r"https://github\.com/([^/]+)/([^/#?]+)", url)
if not match:
return None
repo_url = f"https://github.com/{match.group(1)}/{match.group(2)}"
if repo_url in known_repo_urls:
return None
return {
"candidate_id": f"{system.get('system_id')}--repo-candidate--{slugify(repo_url)}",
"root_system_id": system.get("system_id"),
"display_name": f"{match.group(1)} / {match.group(2)}",
"entity_type": "repo",
"status": "candidate",
"reason": "source catalog exposed a repo-like URL that is not yet cataloged as an entity",
"source": url,
"risk": "medium",
"waiting_for": "确认是否应升级为 cataloged repo/package 实体并补齐历史漏洞",
"canonical_id": "",
}
def build_entity_views(source_map: Dict[str, Any], advisories: List[AdvisoryRecord | Dict[str, Any]]) -> Dict[str, Any]:
generated_at = isoformat(now_utc())
systems = {item["system_id"]: item for item in source_map.get("systems", []) or [] if item.get("system_id")}
advisory_rows = [_advisory_dict(item) for item in advisories]
entities: Dict[str, Dict[str, Any]] = {}
advisories_by_entity: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
for system_id, system in systems.items():
history_policy = system.get("tier") or "history-full"
entities[system_id] = _entity_payload(
entity_id=system_id,
entity_type="system",
display_name=system.get("display_name", system_id),
parent_entity_id=None,
root_system_id=system_id,
category=system.get("category", "unknown"),
ecosystem=system.get("category", "unknown"),
official=True,
status="cataloged",
history_policy=history_policy,
repo_url="",
package_registry="",
marketplace_url="",
latest_version="",
version_scheme="vendor",
source_refs=_source_refs(system),
)
for advisory in advisory_rows:
root_system_id = advisory.get("system_id")
system = systems.get(root_system_id, {})
refs = advisory.get("entity_refs") or [{"entity_id": root_system_id, "entity_type": "system"}]
for ref in refs:
entity_id = ref.get("entity_id")
if not entity_id:
continue
if entity_id not in entities:
package_name = advisory.get("package_name") or advisory.get("title") or entity_id
entity_type = ref.get("entity_type") or infer_entity_type(advisory)
entities[entity_id] = _entity_payload(
entity_id=entity_id,
entity_type=entity_type,
display_name=_display_name(package_name, entity_id),
parent_entity_id=root_system_id,
root_system_id=root_system_id,
category=system.get("category", advisory.get("category", "unknown")),
ecosystem=advisory.get("package_name", "").split("/", 1)[0] if advisory.get("package_name") else system.get("category", "unknown"),
official=entity_type in {"project", "repo"} and entity_type != "package",
status="cataloged",
history_policy="history-full",
repo_url=_repo_url_from_package(advisory.get("package_name") or ""),
package_registry=_package_registry_url(advisory.get("package_name") or ""),
marketplace_url="",
latest_version=advisory.get("patched_version") or "",
version_scheme="semver-ish" if advisory.get("package_name") else "vendor",
source_refs=[],
)
advisories_by_entity[entity_id].append(advisory)
for entity_id, advisories_for_entity in advisories_by_entity.items():
_update_entity_stats(entities[entity_id], advisories_for_entity)
known_repo_urls = {entity.get("repo_url") for entity in entities.values() if entity.get("repo_url")}
candidate_map: Dict[str, Dict[str, Any]] = {}
for system in systems.values():
for bucket in ("official_sources", "ecosystem_sources"):
for source in system.get(bucket, []) or []:
candidate = _candidate_from_source(system, source, known_repo_urls)
if candidate:
candidate_map[candidate["candidate_id"]] = candidate
candidate_backlog = sorted(candidate_map.values(), key=lambda item: (item["root_system_id"], item["display_name"]))
system_summary: Dict[str, Dict[str, Any]] = {}
for entity in entities.values():
summary = system_summary.setdefault(
entity["root_system_id"],
{
"system_id": entity["root_system_id"],
"display_name": systems.get(entity["root_system_id"], {}).get("display_name", entity["root_system_id"]),
"cataloged_entity_total": 0,
"candidate_entity_total": 0,
"workflow_complete_count": 0,
"version_mapped_count": 0,
"official_source_covered_count": 0,
"plugin_total": 0,
},
)
summary["cataloged_entity_total"] += 1
summary["workflow_complete_count"] += 1 if entity["advisory_count"] and entity["workflow_complete_advisory_count"] >= entity["advisory_count"] else 0
summary["version_mapped_count"] += 1 if entity["advisory_count"] and entity["version_mapped_advisory_count"] >= entity["advisory_count"] else 0
summary["official_source_covered_count"] += 1 if entity["official_source_covered"] else 0
if entity["entity_type"] in PLUGINISH_ENTITY_TYPES:
summary["plugin_total"] += 1
for candidate in candidate_backlog:
system_summary.setdefault(
candidate["root_system_id"],
{
"system_id": candidate["root_system_id"],
"display_name": systems.get(candidate["root_system_id"], {}).get("display_name", candidate["root_system_id"]),
"cataloged_entity_total": 0,
"candidate_entity_total": 0,
"workflow_complete_count": 0,
"version_mapped_count": 0,
"official_source_covered_count": 0,
"plugin_total": 0,
},
)["candidate_entity_total"] += 1
cataloged_entities = [entity for entity in entities.values() if entity.get("status") == "cataloged"]
history_full_complete_count = len(
[entity for entity in cataloged_entities if entity.get("history_policy") == "history-full" and entity.get("history_backfill_status") == "complete"]
)
latest_green_count = len([entity for entity in cataloged_entities if entity.get("latest_sync_status") == "green"])
workflow_complete_count = len(
[entity for entity in cataloged_entities if entity.get("advisory_count") and entity.get("workflow_complete_advisory_count") >= entity.get("advisory_count")]
)
version_mapped_count = len(
[entity for entity in cataloged_entities if entity.get("advisory_count") and entity.get("version_mapped_advisory_count") >= entity.get("advisory_count")]
)
official_source_covered_count = len([entity for entity in cataloged_entities if entity.get("official_source_covered")])
plugin_history_full_count = len(
[
entity
for entity in cataloged_entities
if entity.get("entity_type") in PLUGINISH_ENTITY_TYPES and entity.get("history_backfill_status") == "complete"
]
)
queue_payload = {
"generated_at": generated_at,
"discovery_queue": {
"count": len(candidate_backlog),
"items": candidate_backlog[:200],
},
"history_queue": {
"count": len([entity for entity in cataloged_entities if entity.get("history_policy") == "history-full" and entity.get("history_backfill_status") != "complete"]),
"items": [
{
"entity_id": entity["entity_id"],
"display_name": entity["display_name"],
"root_system_id": entity["root_system_id"],
"history_policy": entity["history_policy"],
"history_backfill_status": entity["history_backfill_status"],
}
for entity in cataloged_entities
if entity.get("history_policy") == "history-full" and entity.get("history_backfill_status") != "complete"
][:200],
},
"latest_queue": {
"count": len([entity for entity in cataloged_entities if entity.get("latest_sync_status") != "green"]),
"items": [
{
"entity_id": entity["entity_id"],
"display_name": entity["display_name"],
"root_system_id": entity["root_system_id"],
"latest_sync_status": entity["latest_sync_status"],
"last_synced_at": entity["last_synced_at"],
}
for entity in cataloged_entities
if entity.get("latest_sync_status") != "green"
][:200],
},
"workflow_queue": {
"count": len([advisory for advisory in advisory_rows if advisory.get("workflow", {}).get("review_state") != "ready"]),
"items": [
{
"canonical_id": advisory.get("canonical_id"),
"system_id": advisory.get("system_id"),
"title": advisory.get("title"),
"review_state": advisory.get("workflow", {}).get("review_state"),
"version_resolution_needed": advisory.get("version_resolution_needed"),
}
for advisory in advisory_rows
if advisory.get("workflow", {}).get("review_state") != "ready"
][:200],
},
}
completeness = {
"generated_at": generated_at,
"cataloged_entity_total": len(cataloged_entities),
"candidate_entity_total": len(candidate_backlog),
"history_full_complete_count": history_full_complete_count,
"latest_green_count": latest_green_count,
"workflow_complete_count": workflow_complete_count,
"version_mapped_count": version_mapped_count,
"official_source_covered_count": official_source_covered_count,
"plugin_history_full_count": plugin_history_full_count,
"systems": sorted(system_summary.values(), key=lambda item: item["system_id"]),
"queues": {
key: value["count"] for key, value in queue_payload.items() if key.endswith("_queue")
},
}
report_lines = [
"# 分层实体覆盖与完整度报告",
"",
f"- 生成时间: `{generated_at}`",
f"- 已编目实体: `{completeness['cataloged_entity_total']}`",
f"- 待编目 backlog: `{completeness['candidate_entity_total']}`",
f"- history-full 已完成: `{completeness['history_full_complete_count']}`",
f"- latest green: `{completeness['latest_green_count']}`",
f"- workflow 完整: `{completeness['workflow_complete_count']}`",
f"- 版本映射完整: `{completeness['version_mapped_count']}`",
f"- 官方源覆盖: `{completeness['official_source_covered_count']}`",
f"- 插件 history-full 已完成: `{completeness['plugin_history_full_count']}`",
"",
"## 系统分层摘要",
"",
"| 系统 | cataloged entities | candidate backlog | workflow complete | version mapped | official covered | plugins |",
"| --- | ---: | ---: | ---: | ---: | ---: | ---: |",
]
for item in completeness["systems"]:
report_lines.append(
f"| {item['system_id']} | {item['cataloged_entity_total']} | {item['candidate_entity_total']} | {item['workflow_complete_count']} | {item['version_mapped_count']} | {item['official_source_covered_count']} | {item['plugin_total']} |"
)
backlog_lines = [
"# 分层实体发现 Backlog",
"",
f"- 生成时间: `{generated_at}`",
f"- 待编目数量: `{len(candidate_backlog)}`",
"",
"| candidate_id | root_system | entity_type | risk | reason | waiting_for | source |",
"| --- | --- | --- | --- | --- | --- | --- |",
]
if candidate_backlog:
for item in candidate_backlog[:500]:
backlog_lines.append(
f"| {item['candidate_id']} | {item['root_system_id']} | {item['entity_type']} | {item['risk']} | {item['reason']} | {item['waiting_for']} | {item.get('source') or '-'} |"
)
else:
backlog_lines.append("| - | - | - | - | no backlog | - | - |")
entity_payloads = {
f"{entity['entity_id']}.json": entity for entity in sorted(entities.values(), key=lambda item: item["entity_id"])
}
return {
"generated_at": generated_at,
"entity_payloads": entity_payloads,
"entities": sorted(entities.values(), key=lambda item: item["entity_id"]),
"candidate_backlog": candidate_backlog,
"completeness": completeness,
"queues": queue_payload,
"catalog_report_markdown": "\n".join(report_lines),
"backlog_report_markdown": "\n".join(backlog_lines),
}

查看文件

@@ -120,6 +120,7 @@ def _load_existing_selection(
if len(source_map["systems"]) != len(full_source_map["systems"]):
advisories, triage = _merge_preserved_records(source_map, advisories, triage)
render_map = full_source_map
advisories = route_advisories(render_map, advisories)
return render_map, advisories, triage

查看文件

@@ -64,6 +64,18 @@ class AdvisoryRecord:
secure_code_topics: List[str]
status: str
triage_reasons: List[str] = field(default_factory=list)
entity_refs: List[Dict[str, Any]] = field(default_factory=list)
affected_components: List[Dict[str, Any]] = field(default_factory=list)
affected_version_ranges: List[str] = field(default_factory=list)
fixed_version_ranges: List[str] = field(default_factory=list)
introduced_version: Optional[str] = None
patched_version: Optional[str] = None
version_evidence_sources: List[str] = field(default_factory=list)
advisory_scope: str = "core"
version_confidence: str = "low"
version_gap_reason: str = ""
version_resolution_needed: bool = False
workflow: Dict[str, Any] = field(default_factory=dict)
verification_status: str = "triage-manual"
verification_mode: str = "synthetic"
last_verified_at: Optional[str] = None

查看文件

@@ -6,6 +6,7 @@ from typing import Any, Dict, List
from intel.config import (
ALERTS_PATH,
ENTITY_COMPLETENESS_PATH,
MACHINE_READABLE_SOURCE_KINDS,
MONITORING_DIR,
MONITOR_SUMMARY_PATH,
@@ -363,6 +364,7 @@ def write_monitoring_state(
) -> Dict[str, Any]:
open_alerts = [item for item in alerts if item.get("status") == "open"]
generated_at = source_health.get("generated_at") or isoformat(now_utc())
entity_completeness = read_json(ENTITY_COMPLETENESS_PATH, default={}) or {}
summary = {
"generated_at": generated_at,
"active_source_count": source_health.get("active_source_count", 0),
@@ -387,6 +389,14 @@ def write_monitoring_state(
"error_count": len(validation_errors),
"errors": validation_errors,
},
"entity_coverage": {
"cataloged_entity_total": entity_completeness.get("cataloged_entity_total", 0),
"candidate_entity_total": entity_completeness.get("candidate_entity_total", 0),
"history_full_complete_count": entity_completeness.get("history_full_complete_count", 0),
"workflow_complete_count": entity_completeness.get("workflow_complete_count", 0),
"version_mapped_count": entity_completeness.get("version_mapped_count", 0),
"official_source_covered_count": entity_completeness.get("official_source_covered_count", 0),
},
}
snapshot = {
"generated_at": generated_at,

查看文件

@@ -5,7 +5,22 @@ from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List
from intel.config import FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, ROOT, SECURE_CODE_ROOT, SYSTEMS_DIR, TRIAGE_DIR
from intel.config import (
ENTITY_BACKLOG_PATH,
ENTITY_BACKLOG_REPORT_MD_PATH,
ENTITY_CATALOG_REPORT_MD_PATH,
ENTITY_COMPLETENESS_PATH,
ENTITY_QUEUES_PATH,
ENTITIES_DIR,
FRAMEWORK_ROOT,
GENERATED_DIR,
REGISTRY_ROOT,
ROOT,
SECURE_CODE_ROOT,
SYSTEMS_DIR,
TRIAGE_DIR,
)
from intel.entities import build_entity_views
from intel.models import AdvisoryRecord
from intel.utils import ensure_dir, isoformat, now_utc, write_json, write_text
from lab.render import render_dashboard as render_lab_dashboard
@@ -218,6 +233,25 @@ def _sync_selected_json_dir(path: Path, payloads: Dict[str, Any], selected_syste
write_json(path / filename, payload)
def _sync_selected_entity_json_dir(path: Path, payloads: Dict[str, Any], selected_system_ids: set[str]) -> None:
ensure_dir(path)
desired = set(payloads.keys())
for file_path in path.glob("*.json"):
payload = None
try:
payload = file_path.read_text(encoding="utf-8")
except OSError:
continue
if not payload:
continue
if not any(file_path.stem == system_id or file_path.stem.startswith(f"{system_id}--") for system_id in selected_system_ids):
continue
if file_path.name not in desired:
file_path.unlink()
for filename, payload in payloads.items():
write_json(path / filename, payload)
def render_system_scaffolding(
source_map: Dict[str, Any],
advisories: List[AdvisoryRecord],
@@ -386,6 +420,9 @@ def render_case_pages(advisories: List[AdvisoryRecord], selected_system_ids: set
lines.append("fixed_versions:")
for version in (item.fixed_versions or [])[:20]:
lines.append(f' - "{version}"')
lines.append("entity_refs:")
for ref in item.entity_refs or []:
lines.append(f' - "{ref.get("entity_id", "")}:{ref.get("entity_type", "")}:{ref.get("relation", "")}"')
lines.append("secure_code_topics:")
for topic in item.secure_code_topics or []:
lines.append(f' - "{topic}"')
@@ -415,6 +452,26 @@ def render_case_pages(advisories: List[AdvisoryRecord], selected_system_ids: set
f"- 影响版本: `{', '.join((item.affected_versions or [])[:10]) or 'unknown'}`",
f"- 修复版本: `{', '.join((item.fixed_versions or [])[:10]) or 'unknown'}`",
"",
"## 对象与版本映射",
"",
f"- Advisory Scope: `{item.advisory_scope or 'core'}`",
f"- 影响对象: `{', '.join(component.get('name', '-') for component in (item.affected_components or [])) or 'unknown'}`",
f"- Entity Refs: `{', '.join(ref.get('entity_id', '-') for ref in (item.entity_refs or [])) or 'unknown'}`",
f"- 版本置信度: `{item.version_confidence or 'unknown'}`",
f"- 版本缺口: `{item.version_gap_reason or '-'}`",
f"- 版本证据源: `{', '.join((item.version_evidence_sources or [])[:5]) or 'unknown'}`",
"",
"## 受控验证流程",
"",
f"- Workflow ID: `{item.workflow.get('workflow_id') or '-'}`",
f"- 漏洞家族: `{item.workflow.get('vuln_family') or '-'}`",
f"- 入口面: `{item.workflow.get('entry_surface') or '-'}`",
f"- 需要角色: `{item.workflow.get('required_role') or '-'}`",
f"- 触发向量: {item.workflow.get('trigger_vector') or '-'}",
f"- 请求/页面入口: `{', '.join(item.workflow.get('request_or_ui_path', [])[:5]) or '-'}`",
f"- 输入形态: {item.workflow.get('input_shape') or '-'}",
f"- 预期不安全行为: {item.workflow.get('expected_unsafe_behavior') or '-'}",
"",
"## 其他来源",
"",
]
@@ -427,6 +484,28 @@ def render_case_pages(advisories: List[AdvisoryRecord], selected_system_ids: set
lines.extend(
[
"",
"## 证据点与补丁验证",
"",
]
)
for section, values in [
("服务端证据点", item.workflow.get("server_evidence_points", [])),
("浏览器证据点", item.workflow.get("browser_evidence_points", [])),
("数据库/文件系统证据点", item.workflow.get("db_or_fs_evidence_points", [])),
("检测信号", item.workflow.get("detection_signals", [])),
("补丁验证步骤", item.workflow.get("patch_validation_steps", [])),
("实验安全备注", item.workflow.get("lab_safety_notes", [])),
]:
lines.append(f"### {section}")
lines.append("")
if values:
for value in values:
lines.append(f"- {value}")
else:
lines.append("- 未定义")
lines.append("")
lines.extend(
[
"## 实验层",
"",
"- 仅用于自有资产、测试环境或已明确授权目标。",
@@ -454,6 +533,7 @@ def render_registry(
selected_system_ids: set[str] | None = None,
) -> None:
run_map = latest_runs_by_advisory()
entity_views = build_entity_views(source_map, advisories)
grouped: Dict[str, List[AdvisoryRecord]] = defaultdict(list)
advisory_payloads: Dict[str, Dict[str, Any]] = {}
for advisory in advisories:
@@ -497,10 +577,17 @@ def render_registry(
_sync_selected_json_dir(REGISTRY_ROOT / "advisories", advisory_payloads, selected_system_ids)
_sync_selected_json_dir(TRIAGE_DIR, triage_payloads, selected_system_ids)
_sync_selected_json_dir(REGISTRY_ROOT / "systems", system_payloads, selected_system_ids, systems_dir=True)
selected_entity_payloads = {
filename: payload
for filename, payload in entity_views["entity_payloads"].items()
if payload.get("root_system_id") in selected_system_ids
}
_sync_selected_entity_json_dir(ENTITIES_DIR, selected_entity_payloads, selected_system_ids)
return
_sync_json_dir(REGISTRY_ROOT / "advisories", advisory_payloads)
_sync_json_dir(TRIAGE_DIR, triage_payloads)
_sync_json_dir(REGISTRY_ROOT / "systems", system_payloads)
_sync_json_dir(ENTITIES_DIR, entity_views["entity_payloads"])
def render_generated(
@@ -513,6 +600,7 @@ def render_generated(
ensure_dir(GENERATED_DIR)
systems = {item["system_id"]: item for item in source_map["systems"]}
run_map = latest_runs_by_advisory()
entity_views = build_entity_views(source_map, advisories)
change_summary = change_summary or {}
triage_by_system: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
for item in triage:
@@ -549,6 +637,8 @@ def render_generated(
f"- 渲染时间: `{isoformat(now_utc())}`",
f"- 系统数量: `{len(source_map['systems'])}`",
f"- Advisory 数量: `{len(advisories)}`",
f"- 已编目实体数量: `{entity_views['completeness']['cataloged_entity_total']}`",
f"- 待编目 backlog 数量: `{entity_views['completeness']['candidate_entity_total']}`",
f"- 重点 Markdown 数量: `{markdown_total}`",
f"- Run Bundle 数量: `{len(run_map)}`",
f"- 新增记录: `{change_summary.get('new_count', 0)}`",
@@ -568,6 +658,8 @@ def render_generated(
"generated_at": isoformat(now_utc()),
"system_count": len(source_map["systems"]),
"advisory_count": len(advisories),
"cataloged_entity_total": entity_views["completeness"]["cataloged_entity_total"],
"candidate_entity_total": entity_views["completeness"]["candidate_entity_total"],
"markdown_count": markdown_total,
"new_count": change_summary.get("new_count", 0),
"updated_count": change_summary.get("updated_count", 0),
@@ -577,6 +669,11 @@ def render_generated(
"failures": failures,
},
)
write_json(ENTITY_COMPLETENESS_PATH, entity_views["completeness"])
write_json(ENTITY_BACKLOG_PATH, entity_views["candidate_backlog"])
write_json(ENTITY_QUEUES_PATH, entity_views["queues"])
write_text(ENTITY_CATALOG_REPORT_MD_PATH, entity_views["catalog_report_markdown"])
write_text(ENTITY_BACKLOG_REPORT_MD_PATH, entity_views["backlog_report_markdown"])
render_lab_dashboard(
advisory_records=[item.to_dict() for item in advisories],
source_map_data=source_map,

查看文件

@@ -3,6 +3,7 @@ from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
from intel.entities import enrich_advisory_record
from intel.models import AdvisoryRecord
from intel.utils import slugify
@@ -82,6 +83,7 @@ def route_advisories(source_map: Dict[str, Any], advisories: List[AdvisoryRecord
routed: List[AdvisoryRecord] = []
for advisory in advisories:
system = systems[advisory.system_id]
enrich_advisory_record(advisory, system)
advisory.secure_code_topics = _pick_topics(system, advisory)
advisory.render_markdown = _should_render(system, advisory)
if advisory.render_markdown:

查看文件

@@ -3,7 +3,7 @@ from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
from intel.config import FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, REPRO_MAP_PATH, ROOT, SECURE_CODE_ROOT, SOURCE_MAP_PATH, SYSTEMS_DIR
from intel.config import ENTITIES_DIR, FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, REPRO_MAP_PATH, ROOT, SECURE_CODE_ROOT, SOURCE_MAP_PATH, SYSTEMS_DIR
from intel.render import LANGUAGES, TOPIC_DESCRIPTIONS
from intel.utils import load_all_json
@@ -18,6 +18,24 @@ REQUIRED_REGISTRY_FIELDS = {
"verification_status",
"verification_mode",
"repro_profile_id",
"entity_refs",
"affected_components",
"affected_version_ranges",
"fixed_version_ranges",
"version_confidence",
"workflow",
}
REQUIRED_ENTITY_FIELDS = {
"entity_id",
"entity_type",
"display_name",
"root_system_id",
"category",
"status",
"history_policy",
"latest_sync_status",
"official_source_covered",
}
REQUIRED_SYSTEM_FIELDS = {
@@ -101,6 +119,19 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
missing = REQUIRED_REGISTRY_FIELDS - set(item.keys())
if missing:
errors.append(f"registry advisory missing fields: {item.get('canonical_id', 'unknown')} -> {sorted(missing)}")
workflow = item.get("workflow") or {}
if not workflow.get("workflow_id"):
errors.append(f"registry advisory workflow missing workflow_id: {item.get('canonical_id', 'unknown')}")
if not workflow.get("vuln_family"):
errors.append(f"registry advisory workflow missing vuln_family: {item.get('canonical_id', 'unknown')}")
entity_items = load_all_json(ENTITIES_DIR)
if not entity_items:
errors.append(f"entity registry missing: {ENTITIES_DIR}")
for item in entity_items:
missing = REQUIRED_ENTITY_FIELDS - set(item.keys())
if missing:
errors.append(f"entity registry missing fields: {item.get('entity_id', 'unknown')} -> {sorted(missing)}")
for path in [
GENERATED_DIR / "coverage-matrix.md",
@@ -112,6 +143,11 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
GENERATED_DIR / "source-catalog-audit.json",
GENERATED_DIR / "source-catalog-audit.md",
GENERATED_DIR / "retired-sources.json",
GENERATED_DIR / "entity-completeness.json",
GENERATED_DIR / "entity-discovery-backlog.json",
GENERATED_DIR / "entity-queues.json",
GENERATED_DIR / "entity-catalog-report.md",
GENERATED_DIR / "entity-discovery-backlog.md",
GENERATED_DIR / "dashboard" / "index.html",
GENERATED_DIR / "dashboard" / "overview" / "index.html",
GENERATED_DIR / "dashboard" / "runs" / "index.html",
@@ -140,6 +176,8 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
GENERATED_DIR / "dashboard" / "docs" / "source-map.html",
GENERATED_DIR / "dashboard" / "docs" / "source-catalog-audit.html",
GENERATED_DIR / "dashboard" / "docs" / "retired-sources.html",
GENERATED_DIR / "dashboard" / "docs" / "entity-catalog-report.html",
GENERATED_DIR / "dashboard" / "docs" / "entity-discovery-backlog.html",
GENERATED_DIR / "dashboard" / "docs" / "repro-map.html",
GENERATED_DIR / "dashboard" / "docs" / "coverage-matrix.html",
GENERATED_DIR / "dashboard" / "docs" / "design-source.html",
@@ -149,6 +187,9 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
GENERATED_DIR / "dashboard" / "data" / "alerts.json",
GENERATED_DIR / "dashboard" / "data" / "monitor-summary.json",
GENERATED_DIR / "dashboard" / "data" / "source-catalog-audit.json",
GENERATED_DIR / "dashboard" / "data" / "entity-completeness.json",
GENERATED_DIR / "dashboard" / "data" / "entity-discovery-backlog.json",
GENERATED_DIR / "dashboard" / "data" / "entity-queues.json",
ROOT / "docs" / "testing-completeness-report.md",
ROOT / "08-threat-intel" / "registry" / "source-confidence.md",
]:

查看文件

@@ -45,6 +45,8 @@ const DOC_HUB_ITEMS = [
{ title: "source-map 镜像", href: "/docs/source-map.html", description: "系统覆盖、来源、输出目录和 secure-code 主题真值。", badge: "source-map" },
{ title: "source catalog audit", href: "/docs/source-catalog-audit.html", description: "active/retired source、replacement map 与覆盖摘要。", badge: "audit" },
{ title: "retired sources", href: "/docs/retired-sources.html", description: "退役源、退役原因和 replacement_sources 真值。", badge: "retired" },
{ title: "entity catalog report", href: "/docs/entity-catalog-report.html", description: "分层实体覆盖、history-full 完整度与 workflow 指标。", badge: "entities" },
{ title: "entity discovery backlog", href: "/docs/entity-discovery-backlog.html", description: "待编目 repo / 插件 / 包 backlog 与等待原因。", badge: "backlog" },
{ title: "repro-map 镜像", href: "/docs/repro-map.html", description: "默认漏洞家族、浏览器要求和日志策略真值。", badge: "repro-map" },
{ title: "覆盖矩阵镜像", href: "/docs/coverage-matrix.html", description: "当前全库覆盖矩阵的本地镜像。", badge: "coverage" },
{ title: "安全编码索引", href: "/docs/secure-code-index.html", description: "secure-code 修复主题索引镜像。", badge: "secure-code" },
@@ -58,6 +60,9 @@ const DATA_HUB_ITEMS = [
{ title: "alerts.json", href: "/data/alerts.json", description: "source 告警状态机、failure streak 与 resolved 记录。", badge: "json" },
{ title: "monitor-summary.json", href: "/data/monitor-summary.json", description: "每日监控摘要、open alerts 与最近全绿时间。", badge: "json" },
{ title: "source-catalog-audit.json", href: "/data/source-catalog-audit.json", description: "source catalog 审计真值与 retired/replacement 关系。", badge: "json" },
{ title: "entity-completeness.json", href: "/data/entity-completeness.json", description: "实体级 catalog 完整度、版本映射和 workflow 覆盖。", badge: "json" },
{ title: "entity-discovery-backlog.json", href: "/data/entity-discovery-backlog.json", description: "发现但尚未正式编目的 repo / 插件 / 包 backlog。", badge: "json" },
{ title: "entity-queues.json", href: "/data/entity-queues.json", description: "discovery/history/latest/workflow 四类队列摘要。", badge: "json" },
{ title: "runs.json", href: "/runs.json", description: "最近运行的结构化详情,可用于 UI 和调试。", badge: "json" },
{ title: "systems.json", href: "/systems.json", description: "系统级覆盖、分类、更新时间和浏览器证据统计。", badge: "json" },
{ title: "advisories.json", href: "/advisories.json", description: "漏洞条目元数据、来源和 secure-code 主题。", badge: "json" },
@@ -93,6 +98,7 @@ const state = {
profiles: {},
architecture: null,
completeness: null,
entityCompleteness: null,
sourceHealth: null,
alerts: [],
monitorSummary: null,
@@ -288,9 +294,12 @@ function familyOptions() {
function metricCards() {
const completeness = state.completeness || state.summary?.completeness || {};
const entityCoverage = state.entityCompleteness || state.summary?.entity_coverage || completeness.entity_coverage || {};
const monitoring = state.monitorSummary || state.summary?.monitoring || {};
const advisoryTotal = Number(completeness.advisory_total || state.summary?.advisory_count || 0);
const advisorySuccess = Number(completeness.verified_real || 0);
const catalogedEntities = Number(entityCoverage.cataloged_entity_total || 0);
const candidateEntities = Number(entityCoverage.candidate_entity_total || 0);
const activeSources = Number(monitoring.active_source_count || state.sourceHealth?.active_source_count || 0);
const greenSources = Number(monitoring.green_source_count || state.sourceHealth?.green_source_count || 0);
const openAlerts = Number(monitoring.open_alert_count || state.sourceHealth?.open_alert_count || 0);
@@ -304,6 +313,13 @@ function metricCards() {
color: "var(--accent-green)",
iconName: "report"
},
{
label: "分层实体",
value: catalogedEntities,
note: `backlog ${candidateEntities}`,
color: "var(--accent-yellow)",
iconName: "systems"
},
{
label: "active sources",
value: activeSources,
@@ -774,6 +790,7 @@ function renderPanel(panelKey, title, meta, iconName, content) {
function renderCompletenessPanel(panelKey, compact = false) {
const completeness = state.completeness || state.summary?.completeness || {};
const entityCoverage = state.entityCompleteness || state.summary?.entity_coverage || completeness.entity_coverage || {};
const sourceHealth = state.sourceHealth || completeness.source_health || {};
const systems = (state.completeness?.systems || []).map((system) => `
<article class="plan-card">
@@ -816,12 +833,30 @@ function renderCompletenessPanel(panelKey, compact = false) {
<strong>open alerts</strong>
<span>${escapeHtml(sourceHealth.open_alert_count || 0)}</span>
</article>
<article class="detail-stat">
<strong>cataloged entities</strong>
<span>${escapeHtml(entityCoverage.cataloged_entity_total || 0)}</span>
</article>
<article class="detail-stat">
<strong>candidate backlog</strong>
<span>${escapeHtml(entityCoverage.candidate_entity_total || 0)}</span>
</article>
<article class="detail-stat">
<strong>workflow complete</strong>
<span>${escapeHtml(entityCoverage.workflow_complete_count || 0)}</span>
</article>
<article class="detail-stat">
<strong>version mapped</strong>
<span>${escapeHtml(entityCoverage.version_mapped_count || 0)}</span>
</article>
</div>
<div class="plan-grid" style="margin-top:16px;">${systems || `<div class="empty-state">暂无系统完整度数据。</div>`}</div>
${compact ? "" : `
<div class="detail-actions" style="margin-top:16px;">
<a class="button button-secondary" href="/docs/testing-completeness-report.html" target="_blank" rel="noreferrer">${icon("docs")}<span>打开中文报告</span></a>
<a class="button button-secondary" href="/docs/entity-catalog-report.html" target="_blank" rel="noreferrer">${icon("docs")}<span>打开实体报告</span></a>
<a class="button button-secondary" href="/data/completeness.json" target="_blank" rel="noreferrer">${icon("json")}<span>打开 completeness.json</span></a>
<a class="button button-secondary" href="/data/entity-completeness.json" target="_blank" rel="noreferrer">${icon("json")}<span>打开 entity-completeness.json</span></a>
<a class="button button-secondary" href="/data/source-health.json" target="_blank" rel="noreferrer">${icon("json")}<span>打开 source-health.json</span></a>
</div>
${failures.length ? `<div class="callout" style="margin-top:16px;"><strong>Ingest 未清零</strong><div class="plan-copy">${escapeHtml(failures.join(" | "))}</div></div>` : ""}
@@ -1114,7 +1149,8 @@ function renderRunWorkspace() {
{ label: "概要", copy: advisory.summary || "当前漏洞条目没有摘要。" },
{ label: "成功判据", copy: (profile.success_criteria || []).join(" | ") || "当前复现档案没有定义成功判据。" },
{ label: "Seed / 攻击思路", copy: (run.reasoning_lines || []).join("\n\n") || "当前运行没有记录思路说明。" },
{ label: "允许目标", copy: (profile.allowed_target_types || []).join(", ") || "当前复现档案没有声明允许目标类型。" }
{ label: "允许目标", copy: (profile.allowed_target_types || []).join(", ") || "当前复现档案没有声明允许目标类型。" },
{ label: "版本与对象", copy: `${advisory.advisory_scope || "core"} · ${advisory.version_confidence || "unknown"} · ${(advisory.affected_version_ranges || []).join(", ") || "版本待补齐"}` }
];
const evidenceContent = `
@@ -1239,6 +1275,21 @@ function renderRunWorkspace() {
<div class="tag-row">
${(advisory.aliases || []).map((alias) => `<span class="tag">${escapeHtml(alias)}</span>`).join("")}
${(advisory.secure_code_topics || []).map((topic) => `<a class="tag" href="/docs/secure-code-index.html" target="_blank" rel="noreferrer">${escapeHtml(topic)}</a>`).join("")}
${(advisory.entity_refs || []).map((item) => `<span class="tag">${escapeHtml(item.entity_type || "entity")} · ${escapeHtml(item.entity_id || "-")}</span>`).join("")}
</div>
<div class="plan-grid" style="margin-bottom:16px;">
<article class="plan-card">
<span class="plan-label">影响对象</span>
<div class="plan-copy">${escapeHtml((advisory.affected_components || []).map((item) => item.name).join(" | ") || "未定义")}</div>
</article>
<article class="plan-card">
<span class="plan-label">版本映射</span>
<div class="plan-copy">${escapeHtml((advisory.affected_version_ranges || []).join(" | ") || advisory.version_gap_reason || "未定义")}</div>
</article>
<article class="plan-card">
<span class="plan-label">Workflow</span>
<div class="plan-copy">${escapeHtml(advisory.workflow?.vuln_family || "unknown")} · ${escapeHtml(advisory.workflow?.entry_surface || "-")}</div>
</article>
</div>
<div class="source-links">${sourceLinks}</div>
`)}
@@ -1558,7 +1609,7 @@ async function loadData(preserveSelection = true) {
renderSyncState("loading", "刷新中", `本地时间 ${new Date().toLocaleTimeString("zh-CN", { hour12: false })}`);
try {
const [summary, runs, systems, advisories, profiles, architecture, completeness, sourceHealth, alerts, monitorSummary] = await Promise.all([
const [summary, runs, systems, advisories, profiles, architecture, completeness, entityCompleteness, sourceHealth, alerts, monitorSummary] = await Promise.all([
fetchJson("/summary.json"),
fetchJson("/runs.json"),
fetchJson("/systems.json"),
@@ -1566,6 +1617,7 @@ async function loadData(preserveSelection = true) {
fetchJson("/profiles.json"),
fetchJson("/architecture.json"),
fetchJson("/data/completeness.json"),
fetchJson("/data/entity-completeness.json"),
fetchJson("/data/source-health.json"),
fetchJson("/data/alerts.json"),
fetchJson("/data/monitor-summary.json")
@@ -1578,6 +1630,7 @@ async function loadData(preserveSelection = true) {
state.profiles = profiles;
state.architecture = architecture;
state.completeness = completeness;
state.entityCompleteness = entityCompleteness;
state.sourceHealth = sourceHealth;
state.alerts = alerts;
state.monitorSummary = monitorSummary;

查看文件

@@ -154,6 +154,17 @@ def _advisory_meta(advisory: Dict[str, Any]) -> Dict[str, Any]:
"artifact_mode": advisory.get("artifact_mode"),
"blocked_reason": advisory.get("blocked_reason"),
"browser_evidence": advisory.get("browser_evidence", {}),
"entity_refs": advisory.get("entity_refs", []),
"affected_components": advisory.get("affected_components", []),
"affected_version_ranges": advisory.get("affected_version_ranges", []),
"fixed_version_ranges": advisory.get("fixed_version_ranges", []),
"introduced_version": advisory.get("introduced_version"),
"patched_version": advisory.get("patched_version"),
"version_confidence": advisory.get("version_confidence"),
"version_gap_reason": advisory.get("version_gap_reason"),
"version_resolution_needed": advisory.get("version_resolution_needed"),
"advisory_scope": advisory.get("advisory_scope"),
"workflow": advisory.get("workflow", {}),
}
@@ -277,6 +288,37 @@ def _synthetic_advisory_from_run(run: Dict[str, Any], source_system_map: Dict[st
"updated_at": run.get("finished_at") or run.get("started_at"),
"official_source_url": "",
"secondary_source_urls": [],
"entity_refs": [
{
"entity_id": system_id,
"entity_type": "system",
"relation": "root-system",
"root_system_id": system_id,
"official": True,
}
],
"affected_components": [
{
"name": system_meta.get("display_name", system_id),
"entity_id": system_id,
"scope": "core",
"package_name": None,
"official": True,
}
],
"affected_version_ranges": [],
"fixed_version_ranges": [],
"introduced_version": None,
"patched_version": None,
"version_confidence": "low",
"version_gap_reason": "derived from stored run without a persisted advisory record",
"version_resolution_needed": True,
"advisory_scope": "core",
"workflow": {
"workflow_id": f"{run.get('advisory_id')}--derived-workflow",
"vuln_family": run.get("repro_profile_id") or "unknown",
"entry_surface": "stored-run-derived",
},
}
@@ -317,6 +359,7 @@ def _build_completeness(
source_health: Dict[str, Any],
alerts: List[Dict[str, Any]],
monitor_summary: Dict[str, Any],
entity_completeness: Dict[str, Any],
) -> Dict[str, Any]:
tracked_advisories = [item for item in advisories if item.get("last_run_id")] or advisories
latest_statuses: Dict[str, int] = {}
@@ -402,6 +445,7 @@ def _build_completeness(
"open_alert_count": open_alert_count,
"resolved_alert_count": len([item for item in alerts if item.get("status") == "resolved"]),
},
"entity_coverage": entity_completeness or {},
"monitor_summary": monitor_summary or {},
"historical_blockers": [
"Docker daemon unavailable caused provision-compose-environment blocked-artifact.",
@@ -409,6 +453,7 @@ def _build_completeness(
"Baseline and browser steps were skipped when environment readiness was not enforced.",
"Latest completeness now uses one advisory -> latest run semantics instead of historical run piles.",
"Source health now counts only status=active sources; retired sources are audited separately with replacement links.",
"Entity coverage now separates cataloged entities from discovery backlog so infinite internet scope no longer pollutes completion numbers.",
],
}
@@ -426,6 +471,8 @@ def _write_testing_completeness_report(completeness: Dict[str, Any]) -> None:
f"- active source 全绿: `{completeness['source_health']['green_source_count']}/{completeness['source_health']['active_source_count']}`",
f"- source open alerts: `{completeness['source_health']['open_alert_count']}`",
f"- 最近一次 source 全绿: `{completeness['source_health'].get('last_fully_green_run') or '-'}`",
f"- 已编目实体: `{completeness.get('entity_coverage', {}).get('cataloged_entity_total', 0)}`",
f"- 待编目 backlog: `{completeness.get('entity_coverage', {}).get('candidate_entity_total', 0)}`",
"",
"## 系统覆盖矩阵",
"",
@@ -441,6 +488,15 @@ def _write_testing_completeness_report(completeness: Dict[str, Any]) -> None:
)
lines.extend(
[
"",
"## 分层实体完整度",
"",
f"- history-full 已完成: `{completeness.get('entity_coverage', {}).get('history_full_complete_count', 0)}`",
f"- latest green: `{completeness.get('entity_coverage', {}).get('latest_green_count', 0)}`",
f"- workflow 完整: `{completeness.get('entity_coverage', {}).get('workflow_complete_count', 0)}`",
f"- 版本映射完整: `{completeness.get('entity_coverage', {}).get('version_mapped_count', 0)}`",
f"- 官方源覆盖: `{completeness.get('entity_coverage', {}).get('official_source_covered_count', 0)}`",
f"- 插件 history-full 已完成: `{completeness.get('entity_coverage', {}).get('plugin_history_full_count', 0)}`",
"",
"## 历史阻塞项修复纪要",
"",
@@ -495,6 +551,8 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
_link("source-map 真值", "/docs/source-map.html", "系统覆盖、来源和输出目录真值。"),
_link("source catalog audit", "/docs/source-catalog-audit.html", "active/retired source 审计、替代关系与覆盖摘要。"),
_link("retired sources", "/docs/retired-sources.html", "退役源、退役原因与 replacement map。"),
_link("entity catalog report", "/docs/entity-catalog-report.html", "分层实体覆盖、history-full 完整度与 workflow 指标。"),
_link("entity discovery backlog", "/docs/entity-discovery-backlog.html", "待编目 repo / 插件 / 包 backlog 与等待原因。"),
_link("repro-map 真值", "/docs/repro-map.html", "复现族路由、浏览器要求和日志策略。"),
_link("覆盖矩阵", "/docs/coverage-matrix.html", "自动生成覆盖摘要的本地镜像。"),
_link("设计来源清单", "/docs/design-source.html", "Lovart 模板本地 vendor manifest。"),
@@ -507,6 +565,9 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
_link("source-health.json", "/data/source-health.json", "active source 健康度、系统分布与失败分类。"),
_link("alerts.json", "/data/alerts.json", "source 告警状态机、failure streak 与 resolved 记录。"),
_link("monitor-summary.json", "/data/monitor-summary.json", "每日监控摘要、open alerts 与最近全绿时间。"),
_link("entity-completeness.json", "/data/entity-completeness.json", "实体级 catalog 完整度、版本映射与 workflow 覆盖。"),
_link("entity-discovery-backlog.json", "/data/entity-discovery-backlog.json", "发现但尚未正式编目的 repo / 插件 / 包 backlog。"),
_link("entity-queues.json", "/data/entity-queues.json", "discovery/history/latest/workflow 四类队列摘要。"),
_link("runs.json", "/runs.json", "最近 run 的结构化详情。"),
_link("systems.json", "/systems.json", "系统级覆盖与浏览器证据摘要。"),
_link("advisories.json", "/advisories.json", "漏洞条目元数据与来源。"),
@@ -650,6 +711,8 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
_stat("纳管系统", len(source_systems)),
_stat("历史全量系统", tier_counts.get("history-full", 0)),
_stat("近两年全量系统", tier_counts.get("rolling-24m", 0)),
_stat("已编目实体", (summary.get("entity_coverage") or {}).get("cataloged_entity_total", 0)),
_stat("发现 backlog", (summary.get("entity_coverage") or {}).get("candidate_entity_total", 0)),
_stat("当前运行", summary.get("run_count", 0)),
_stat("当前漏洞条目", summary.get("advisory_count", 0)),
],
@@ -756,6 +819,7 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
"fields": [
_field("漏洞条目 Registry", "08-threat-intel/registry/advisories/*.json"),
_field("系统 Registry", "08-threat-intel/registry/systems/*.json"),
_field("实体 Registry", "08-threat-intel/registry/entities/*.json"),
_field("运行 Registry", "08-threat-intel/registry/runs/*.json"),
_field("source-map 真值", "08-threat-intel/source-map.yaml"),
_field("repro-map 真值", "08-threat-intel/repro-map.yaml"),
@@ -769,6 +833,9 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
"fields": [
_field("工作台根目录", "08-threat-intel/generated/dashboard/"),
_field("运行归档根目录", "06-case-studies/generated-runs/<run-id>/"),
_field("实体完整度", "/data/entity-completeness.json"),
_field("发现 backlog", "/data/entity-discovery-backlog.json"),
_field("实体队列", "/data/entity-queues.json"),
_field("默认入口", "/index.html"),
_field("总览入口", "/overview/index.html"),
_field("运行入口", "/runs/index.html"),
@@ -1013,6 +1080,18 @@ def _write_dashboard_docs(architecture: Dict[str, Any]) -> None:
REPRO_MAP_PATH.read_text(encoding="utf-8"),
"工作台内置镜像页:默认漏洞家族、浏览器要求和日志策略真值。",
),
(
"entity-catalog-report.html",
"分层实体覆盖报告",
_safe_read_text(ROOT / "08-threat-intel" / "generated" / "entity-catalog-report.md", "entity catalog report has not been generated yet."),
"工作台内置镜像页分层实体覆盖、history-full 完整度和 workflow 统计。",
),
(
"entity-discovery-backlog.html",
"分层实体发现 Backlog",
_safe_read_text(ROOT / "08-threat-intel" / "generated" / "entity-discovery-backlog.md", "entity discovery backlog has not been generated yet."),
"工作台内置镜像页:待编目 repo / 插件 / 包 backlog 与等待原因。",
),
(
"coverage-matrix.html",
"覆盖矩阵镜像",
@@ -1281,6 +1360,9 @@ def render_dashboard(
alerts = read_json(ROOT / "08-threat-intel" / "generated" / "alerts.json", default=[]) or []
monitor_summary = read_json(ROOT / "08-threat-intel" / "generated" / "monitor-summary.json", default={}) or {}
source_catalog_audit = read_json(ROOT / "08-threat-intel" / "generated" / "source-catalog-audit.json", default={}) or {}
entity_completeness = read_json(ROOT / "08-threat-intel" / "generated" / "entity-completeness.json", default={}) or {}
entity_backlog = read_json(ROOT / "08-threat-intel" / "generated" / "entity-discovery-backlog.json", default=[]) or []
entity_queues = read_json(ROOT / "08-threat-intel" / "generated" / "entity-queues.json", default={}) or {}
source_map = source_map_data if source_map_data is not None else (read_yaml(SOURCE_MAP_PATH, default={}) or {})
repro_map = repro_map_data if repro_map_data is not None else (read_yaml(REPRO_MAP_PATH, default={}) or {})
source_system_map = {item["system_id"]: item for item in source_map.get("systems", []) if item.get("system_id")}
@@ -1400,6 +1482,7 @@ def render_dashboard(
"open_alert_count": len([item for item in alerts if item.get("status") == "open"]),
"last_fully_green_run": source_health.get("last_fully_green_run"),
},
"entity_coverage": entity_completeness,
}
for item in merged_advisories:
status = item.get("verification_status", "triage-manual")
@@ -1428,7 +1511,7 @@ def render_dashboard(
for item in sorted(merged_advisories, key=lambda value: value.get("updated_at") or value.get("published_at") or "", reverse=True)
if item.get("verification_status") in {"triage-manual", "blocked-artifact", "blocked-destructive"}
][:20]
completeness = _build_completeness(merged_advisories, runs, profile_map, run_summary, source_health, alerts, monitor_summary)
completeness = _build_completeness(merged_advisories, runs, profile_map, run_summary, source_health, alerts, monitor_summary, entity_completeness)
summary["completeness"] = {
"advisory_total": completeness["advisory_total"],
"verified_real": completeness["verified_real"],
@@ -1440,6 +1523,10 @@ def render_dashboard(
"source_failure_count": completeness["source_health"]["failure_count"],
"active_source_count": completeness["source_health"]["active_source_count"],
"open_alert_count": completeness["source_health"]["open_alert_count"],
"cataloged_entity_total": entity_completeness.get("cataloged_entity_total", 0),
"candidate_entity_total": entity_completeness.get("candidate_entity_total", 0),
"workflow_complete_count": entity_completeness.get("workflow_complete_count", 0),
"version_mapped_count": entity_completeness.get("version_mapped_count", 0),
}
write_json(DASHBOARD_DIR / "summary.json", summary)
@@ -1452,6 +1539,9 @@ def render_dashboard(
write_json(DASHBOARD_DIR / "data" / "alerts.json", alerts)
write_json(DASHBOARD_DIR / "data" / "monitor-summary.json", monitor_summary)
write_json(DASHBOARD_DIR / "data" / "source-catalog-audit.json", source_catalog_audit)
write_json(DASHBOARD_DIR / "data" / "entity-completeness.json", entity_completeness)
write_json(DASHBOARD_DIR / "data" / "entity-discovery-backlog.json", entity_backlog)
write_json(DASHBOARD_DIR / "data" / "entity-queues.json", entity_queues)
_write_testing_completeness_report(completeness)
architecture = _build_architecture_data(summary, source_map, repro_map)
write_json(DASHBOARD_DIR / "architecture.json", architecture)