实现分层实体漏洞知识库与实体级完整度监控

这个提交包含在:
hao
2026-03-19 17:57:45 -07:00
父节点 49fe46ab89
当前提交 1e81279e32
修改 2712 个文件,包含 434447 行新增2774 行删除

查看文件

@@ -154,6 +154,17 @@ def _advisory_meta(advisory: Dict[str, Any]) -> Dict[str, Any]:
"artifact_mode": advisory.get("artifact_mode"),
"blocked_reason": advisory.get("blocked_reason"),
"browser_evidence": advisory.get("browser_evidence", {}),
"entity_refs": advisory.get("entity_refs", []),
"affected_components": advisory.get("affected_components", []),
"affected_version_ranges": advisory.get("affected_version_ranges", []),
"fixed_version_ranges": advisory.get("fixed_version_ranges", []),
"introduced_version": advisory.get("introduced_version"),
"patched_version": advisory.get("patched_version"),
"version_confidence": advisory.get("version_confidence"),
"version_gap_reason": advisory.get("version_gap_reason"),
"version_resolution_needed": advisory.get("version_resolution_needed"),
"advisory_scope": advisory.get("advisory_scope"),
"workflow": advisory.get("workflow", {}),
}
@@ -277,6 +288,37 @@ def _synthetic_advisory_from_run(run: Dict[str, Any], source_system_map: Dict[st
"updated_at": run.get("finished_at") or run.get("started_at"),
"official_source_url": "",
"secondary_source_urls": [],
"entity_refs": [
{
"entity_id": system_id,
"entity_type": "system",
"relation": "root-system",
"root_system_id": system_id,
"official": True,
}
],
"affected_components": [
{
"name": system_meta.get("display_name", system_id),
"entity_id": system_id,
"scope": "core",
"package_name": None,
"official": True,
}
],
"affected_version_ranges": [],
"fixed_version_ranges": [],
"introduced_version": None,
"patched_version": None,
"version_confidence": "low",
"version_gap_reason": "derived from stored run without a persisted advisory record",
"version_resolution_needed": True,
"advisory_scope": "core",
"workflow": {
"workflow_id": f"{run.get('advisory_id')}--derived-workflow",
"vuln_family": run.get("repro_profile_id") or "unknown",
"entry_surface": "stored-run-derived",
},
}
@@ -317,6 +359,7 @@ def _build_completeness(
source_health: Dict[str, Any],
alerts: List[Dict[str, Any]],
monitor_summary: Dict[str, Any],
entity_completeness: Dict[str, Any],
) -> Dict[str, Any]:
tracked_advisories = [item for item in advisories if item.get("last_run_id")] or advisories
latest_statuses: Dict[str, int] = {}
@@ -402,6 +445,7 @@ def _build_completeness(
"open_alert_count": open_alert_count,
"resolved_alert_count": len([item for item in alerts if item.get("status") == "resolved"]),
},
"entity_coverage": entity_completeness or {},
"monitor_summary": monitor_summary or {},
"historical_blockers": [
"Docker daemon unavailable caused provision-compose-environment blocked-artifact.",
@@ -409,6 +453,7 @@ def _build_completeness(
"Baseline and browser steps were skipped when environment readiness was not enforced.",
"Latest completeness now uses one advisory -> latest run semantics instead of historical run piles.",
"Source health now counts only status=active sources; retired sources are audited separately with replacement links.",
"Entity coverage now separates cataloged entities from discovery backlog so infinite internet scope no longer pollutes completion numbers.",
],
}
@@ -426,6 +471,8 @@ def _write_testing_completeness_report(completeness: Dict[str, Any]) -> None:
f"- active source 全绿: `{completeness['source_health']['green_source_count']}/{completeness['source_health']['active_source_count']}`",
f"- source open alerts: `{completeness['source_health']['open_alert_count']}`",
f"- 最近一次 source 全绿: `{completeness['source_health'].get('last_fully_green_run') or '-'}`",
f"- 已编目实体: `{completeness.get('entity_coverage', {}).get('cataloged_entity_total', 0)}`",
f"- 待编目 backlog: `{completeness.get('entity_coverage', {}).get('candidate_entity_total', 0)}`",
"",
"## 系统覆盖矩阵",
"",
@@ -441,6 +488,15 @@ def _write_testing_completeness_report(completeness: Dict[str, Any]) -> None:
)
lines.extend(
[
"",
"## 分层实体完整度",
"",
f"- history-full 已完成: `{completeness.get('entity_coverage', {}).get('history_full_complete_count', 0)}`",
f"- latest green: `{completeness.get('entity_coverage', {}).get('latest_green_count', 0)}`",
f"- workflow 完整: `{completeness.get('entity_coverage', {}).get('workflow_complete_count', 0)}`",
f"- 版本映射完整: `{completeness.get('entity_coverage', {}).get('version_mapped_count', 0)}`",
f"- 官方源覆盖: `{completeness.get('entity_coverage', {}).get('official_source_covered_count', 0)}`",
f"- 插件 history-full 已完成: `{completeness.get('entity_coverage', {}).get('plugin_history_full_count', 0)}`",
"",
"## 历史阻塞项修复纪要",
"",
@@ -495,6 +551,8 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
_link("source-map 真值", "/docs/source-map.html", "系统覆盖、来源和输出目录真值。"),
_link("source catalog audit", "/docs/source-catalog-audit.html", "active/retired source 审计、替代关系与覆盖摘要。"),
_link("retired sources", "/docs/retired-sources.html", "退役源、退役原因与 replacement map。"),
_link("entity catalog report", "/docs/entity-catalog-report.html", "分层实体覆盖、history-full 完整度与 workflow 指标。"),
_link("entity discovery backlog", "/docs/entity-discovery-backlog.html", "待编目 repo / 插件 / 包 backlog 与等待原因。"),
_link("repro-map 真值", "/docs/repro-map.html", "复现族路由、浏览器要求和日志策略。"),
_link("覆盖矩阵", "/docs/coverage-matrix.html", "自动生成覆盖摘要的本地镜像。"),
_link("设计来源清单", "/docs/design-source.html", "Lovart 模板本地 vendor manifest。"),
@@ -507,6 +565,9 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
_link("source-health.json", "/data/source-health.json", "active source 健康度、系统分布与失败分类。"),
_link("alerts.json", "/data/alerts.json", "source 告警状态机、failure streak 与 resolved 记录。"),
_link("monitor-summary.json", "/data/monitor-summary.json", "每日监控摘要、open alerts 与最近全绿时间。"),
_link("entity-completeness.json", "/data/entity-completeness.json", "实体级 catalog 完整度、版本映射与 workflow 覆盖。"),
_link("entity-discovery-backlog.json", "/data/entity-discovery-backlog.json", "发现但尚未正式编目的 repo / 插件 / 包 backlog。"),
_link("entity-queues.json", "/data/entity-queues.json", "discovery/history/latest/workflow 四类队列摘要。"),
_link("runs.json", "/runs.json", "最近 run 的结构化详情。"),
_link("systems.json", "/systems.json", "系统级覆盖与浏览器证据摘要。"),
_link("advisories.json", "/advisories.json", "漏洞条目元数据与来源。"),
@@ -650,6 +711,8 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
_stat("纳管系统", len(source_systems)),
_stat("历史全量系统", tier_counts.get("history-full", 0)),
_stat("近两年全量系统", tier_counts.get("rolling-24m", 0)),
_stat("已编目实体", (summary.get("entity_coverage") or {}).get("cataloged_entity_total", 0)),
_stat("发现 backlog", (summary.get("entity_coverage") or {}).get("candidate_entity_total", 0)),
_stat("当前运行", summary.get("run_count", 0)),
_stat("当前漏洞条目", summary.get("advisory_count", 0)),
],
@@ -756,6 +819,7 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
"fields": [
_field("漏洞条目 Registry", "08-threat-intel/registry/advisories/*.json"),
_field("系统 Registry", "08-threat-intel/registry/systems/*.json"),
_field("实体 Registry", "08-threat-intel/registry/entities/*.json"),
_field("运行 Registry", "08-threat-intel/registry/runs/*.json"),
_field("source-map 真值", "08-threat-intel/source-map.yaml"),
_field("repro-map 真值", "08-threat-intel/repro-map.yaml"),
@@ -769,6 +833,9 @@ def _build_architecture_data(summary: Dict[str, Any], source_map: Dict[str, Any]
"fields": [
_field("工作台根目录", "08-threat-intel/generated/dashboard/"),
_field("运行归档根目录", "06-case-studies/generated-runs/<run-id>/"),
_field("实体完整度", "/data/entity-completeness.json"),
_field("发现 backlog", "/data/entity-discovery-backlog.json"),
_field("实体队列", "/data/entity-queues.json"),
_field("默认入口", "/index.html"),
_field("总览入口", "/overview/index.html"),
_field("运行入口", "/runs/index.html"),
@@ -1013,6 +1080,18 @@ def _write_dashboard_docs(architecture: Dict[str, Any]) -> None:
REPRO_MAP_PATH.read_text(encoding="utf-8"),
"工作台内置镜像页:默认漏洞家族、浏览器要求和日志策略真值。",
),
(
"entity-catalog-report.html",
"分层实体覆盖报告",
_safe_read_text(ROOT / "08-threat-intel" / "generated" / "entity-catalog-report.md", "entity catalog report has not been generated yet."),
"工作台内置镜像页分层实体覆盖、history-full 完整度和 workflow 统计。",
),
(
"entity-discovery-backlog.html",
"分层实体发现 Backlog",
_safe_read_text(ROOT / "08-threat-intel" / "generated" / "entity-discovery-backlog.md", "entity discovery backlog has not been generated yet."),
"工作台内置镜像页:待编目 repo / 插件 / 包 backlog 与等待原因。",
),
(
"coverage-matrix.html",
"覆盖矩阵镜像",
@@ -1281,6 +1360,9 @@ def render_dashboard(
alerts = read_json(ROOT / "08-threat-intel" / "generated" / "alerts.json", default=[]) or []
monitor_summary = read_json(ROOT / "08-threat-intel" / "generated" / "monitor-summary.json", default={}) or {}
source_catalog_audit = read_json(ROOT / "08-threat-intel" / "generated" / "source-catalog-audit.json", default={}) or {}
entity_completeness = read_json(ROOT / "08-threat-intel" / "generated" / "entity-completeness.json", default={}) or {}
entity_backlog = read_json(ROOT / "08-threat-intel" / "generated" / "entity-discovery-backlog.json", default=[]) or []
entity_queues = read_json(ROOT / "08-threat-intel" / "generated" / "entity-queues.json", default={}) or {}
source_map = source_map_data if source_map_data is not None else (read_yaml(SOURCE_MAP_PATH, default={}) or {})
repro_map = repro_map_data if repro_map_data is not None else (read_yaml(REPRO_MAP_PATH, default={}) or {})
source_system_map = {item["system_id"]: item for item in source_map.get("systems", []) if item.get("system_id")}
@@ -1400,6 +1482,7 @@ def render_dashboard(
"open_alert_count": len([item for item in alerts if item.get("status") == "open"]),
"last_fully_green_run": source_health.get("last_fully_green_run"),
},
"entity_coverage": entity_completeness,
}
for item in merged_advisories:
status = item.get("verification_status", "triage-manual")
@@ -1428,7 +1511,7 @@ def render_dashboard(
for item in sorted(merged_advisories, key=lambda value: value.get("updated_at") or value.get("published_at") or "", reverse=True)
if item.get("verification_status") in {"triage-manual", "blocked-artifact", "blocked-destructive"}
][:20]
completeness = _build_completeness(merged_advisories, runs, profile_map, run_summary, source_health, alerts, monitor_summary)
completeness = _build_completeness(merged_advisories, runs, profile_map, run_summary, source_health, alerts, monitor_summary, entity_completeness)
summary["completeness"] = {
"advisory_total": completeness["advisory_total"],
"verified_real": completeness["verified_real"],
@@ -1440,6 +1523,10 @@ def render_dashboard(
"source_failure_count": completeness["source_health"]["failure_count"],
"active_source_count": completeness["source_health"]["active_source_count"],
"open_alert_count": completeness["source_health"]["open_alert_count"],
"cataloged_entity_total": entity_completeness.get("cataloged_entity_total", 0),
"candidate_entity_total": entity_completeness.get("candidate_entity_total", 0),
"workflow_complete_count": entity_completeness.get("workflow_complete_count", 0),
"version_mapped_count": entity_completeness.get("version_mapped_count", 0),
}
write_json(DASHBOARD_DIR / "summary.json", summary)
@@ -1452,6 +1539,9 @@ def render_dashboard(
write_json(DASHBOARD_DIR / "data" / "alerts.json", alerts)
write_json(DASHBOARD_DIR / "data" / "monitor-summary.json", monitor_summary)
write_json(DASHBOARD_DIR / "data" / "source-catalog-audit.json", source_catalog_audit)
write_json(DASHBOARD_DIR / "data" / "entity-completeness.json", entity_completeness)
write_json(DASHBOARD_DIR / "data" / "entity-discovery-backlog.json", entity_backlog)
write_json(DASHBOARD_DIR / "data" / "entity-queues.json", entity_queues)
_write_testing_completeness_report(completeness)
architecture = _build_architecture_data(summary, source_map, repro_map)
write_json(DASHBOARD_DIR / "architecture.json", architecture)