kb: expand authorized lab coverage and intel automation

这个提交包含在:
hao
2026-03-16 22:04:51 -07:00
父节点 cda31e86c7
当前提交 d0120fbf10
修改 592 个文件,包含 29025 行新增267 行删除

290
scripts/intel/main.py 普通文件
查看文件

@@ -0,0 +1,290 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from typing import Any, Dict, List, Tuple
CURRENT_DIR = Path(__file__).resolve().parent
SCRIPTS_DIR = CURRENT_DIR.parent
if str(SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(SCRIPTS_DIR))
from intel.config import ADVISORIES_DIR, GENERATED_DIR, STATE_DIR, STATE_PATH, TRIAGE_DIR, load_source_map # noqa: E402
from intel.models import AdvisoryRecord # noqa: E402
from intel.normalize import normalize_candidates # noqa: E402
from intel.pr import open_pr # noqa: E402
from intel.render import render_case_pages, render_generated, render_registry, render_secure_code, render_system_scaffolding # noqa: E402
from intel.route import route_advisories # noqa: E402
from intel.sources.runner import collect_candidates # noqa: E402
from intel.utils import isoformat, load_all_json, now_utc, parse_since, read_json, write_json # noqa: E402
from intel.validators import validate # noqa: E402
def _load_existing_advisories() -> List[AdvisoryRecord]:
advisories: List[AdvisoryRecord] = []
for item in load_all_json(ADVISORIES_DIR):
try:
advisories.append(AdvisoryRecord(**item))
except TypeError:
continue
return advisories
def _load_existing_triage() -> List[Dict[str, Any]]:
return load_all_json(TRIAGE_DIR)
def _filter_source_map(source_map: Dict[str, Any], system_ids: List[str] | None) -> Dict[str, Any]:
if not system_ids:
return source_map
allowed = set(system_ids)
filtered = [system for system in source_map["systems"] if system["system_id"] in allowed]
found = {system["system_id"] for system in filtered}
missing = sorted(allowed - found)
if missing:
raise ValueError(f"Unknown system_id(s): {', '.join(missing)}")
return {**source_map, "systems": filtered}
def _merge_preserved_records(
selected_source_map: Dict[str, Any],
advisories: List[AdvisoryRecord],
triage: List[Dict[str, Any]],
) -> tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
selected_ids = {system["system_id"] for system in selected_source_map["systems"]}
preserved_advisories = [item for item in _load_existing_advisories() if item.system_id not in selected_ids]
preserved_triage = [item for item in _load_existing_triage() if item.get("system_id") not in selected_ids]
return preserved_advisories + advisories, preserved_triage + triage
def _summarize_changes(advisories: List[AdvisoryRecord]) -> Dict[str, Any]:
new_count = 0
updated_count = 0
touched = set()
for advisory in advisories:
path = ADVISORIES_DIR / f"{advisory.canonical_id}.json"
existing = read_json(path, default=None)
current = advisory.to_dict()
if existing is None:
new_count += 1
touched.add(advisory.system_id)
continue
if existing != current:
updated_count += 1
touched.add(advisory.system_id)
return {
"new_count": new_count,
"updated_count": updated_count,
"systems_touched": sorted(touched),
}
def _select_hotlane(
advisories: List[AdvisoryRecord],
triage: List[Dict[str, Any]],
) -> Tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
filtered = []
keep_ids = set()
for advisory in advisories:
if advisory.exploit_status in {"known_exploited", "active_exploitation", "in_the_wild"}:
filtered.append(advisory)
keep_ids.add(advisory.canonical_id)
continue
if advisory.cvss_score is not None and advisory.cvss_score >= 8.8:
filtered.append(advisory)
keep_ids.add(advisory.canonical_id)
continue
if advisory.severity == "critical":
filtered.append(advisory)
keep_ids.add(advisory.canonical_id)
filtered_triage = [item for item in triage if item.get("canonical_id") in keep_ids]
return filtered, filtered_triage
def _write_outputs(
source_map: Dict[str, Any],
advisories: List[AdvisoryRecord],
triage: List[Dict[str, Any]],
failures: List[str],
change_summary: Dict[str, Any],
) -> None:
render_registry(source_map, advisories, triage)
render_system_scaffolding(source_map, advisories)
render_case_pages(advisories)
render_secure_code(source_map)
render_generated(source_map, advisories, triage, failures, change_summary)
def pipeline(
full_source_map: Dict[str, Any],
source_map: Dict[str, Any],
since_arg: str,
tier: str | None,
include_undated: bool,
hotlane_only: bool = False,
) -> tuple[list[AdvisoryRecord], list[Dict[str, Any]], list[str], Dict[str, Any]]:
since_dt = None if tier == "history-full" else parse_since(since_arg, default_days=30)
candidates, failures = collect_candidates(source_map, since_dt=since_dt, tier=tier, include_undated=include_undated)
advisories, triage = normalize_candidates(candidates)
advisories = route_advisories(source_map, advisories)
if hotlane_only:
advisories, triage = _select_hotlane(advisories, triage)
change_summary = _summarize_changes(advisories)
render_map = source_map
if len(source_map["systems"]) != len(full_source_map["systems"]):
advisories, triage = _merge_preserved_records(source_map, advisories, triage)
render_map = full_source_map
_write_outputs(render_map, advisories, triage, failures, change_summary)
return advisories, triage, failures, change_summary
def cmd_render(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
allowed = {system["system_id"] for system in source_map["systems"]}
advisories = [item for item in _load_existing_advisories() if item.system_id in allowed]
triage = [item for item in _load_existing_triage() if item.get("system_id") in allowed]
summary = read_json(GENERATED_DIR / "run-summary.json", default={}) or {}
failures = summary.get("failures", [])
render_map = source_map
if len(source_map["systems"]) != len(full_source_map["systems"]):
advisories, triage = _merge_preserved_records(source_map, advisories, triage)
render_map = full_source_map
_write_outputs(render_map, advisories, triage, failures, summary)
return 0
def cmd_validate(args) -> int:
source_map = _filter_source_map(load_source_map(), args.system)
errors = validate(source_map)
if errors:
print("Validation failed:")
for error in errors:
print(f"- {error}")
return 1
print("Validation passed.")
return 0
def _write_state(status: str) -> None:
STATE_DIR.mkdir(parents=True, exist_ok=True)
state = read_json(STATE_PATH, default={}) or {}
state["last_success"] = isoformat(now_utc())
state["status"] = status
write_json(STATE_PATH, state)
def cmd_ingest(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
since = args.since
if since == "last-success":
state = read_json(STATE_PATH, default={}) or {}
since = state.get("last_success", "30d")
advisories, triage, failures, summary = pipeline(full_source_map, source_map, since, None, include_undated=False)
_write_state("success")
print(
f"Ingested {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_hotlane(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
advisories, triage, failures, summary = pipeline(full_source_map, source_map, "1d", None, include_undated=False, hotlane_only=True)
_write_state("success")
print(
f"Hotlane synced {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_reconcile(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
advisories, triage, failures, summary = pipeline(full_source_map, source_map, "30d", None, include_undated=False)
_write_state("success")
print(
f"Reconciled {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_backfill(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
if args.dry_run:
candidates, failures = collect_candidates(source_map, since_dt=None, tier=args.tier, include_undated=True)
advisories, triage = normalize_candidates(candidates)
advisories = route_advisories(source_map, advisories)
if args.hotlane_only:
advisories, triage = _select_hotlane(advisories, triage)
print(
f"Dry run backfill tier={args.tier}: candidates={len(candidates)} advisories={len(advisories)} triage={len(triage)} failures={len(failures)}"
)
return 0
advisories, triage, failures, summary = pipeline(
full_source_map,
source_map,
"",
args.tier,
include_undated=True,
hotlane_only=args.hotlane_only,
)
print(
f"Backfilled {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_open_pr(args) -> int:
print(open_pr(base_branch=args.base, dry_run=args.dry_run))
return 0
def main() -> int:
parser = argparse.ArgumentParser(description="Websafe threat intel automation")
subparsers = parser.add_subparsers(dest="command", required=True)
backfill = subparsers.add_parser("backfill", help="Fetch historical advisories")
backfill.add_argument("--tier", choices=["history-full", "rolling-24m"], required=True)
backfill.add_argument("--dry-run", action="store_true")
backfill.add_argument("--hotlane-only", action="store_true")
backfill.add_argument("--system", action="append")
backfill.set_defaults(func=cmd_backfill)
ingest = subparsers.add_parser("ingest", help="Fetch incremental advisories")
ingest.add_argument("--since", default="last-success")
ingest.add_argument("--system", action="append")
ingest.set_defaults(func=cmd_ingest)
hotlane = subparsers.add_parser("hotlane", help="Fetch only KEV / in-the-wild / critical updates")
hotlane.add_argument("--system", action="append")
hotlane.set_defaults(func=cmd_hotlane)
reconcile = subparsers.add_parser("reconcile", help="Reconcile the last 30 days of updates")
reconcile.add_argument("--system", action="append")
reconcile.set_defaults(func=cmd_reconcile)
render = subparsers.add_parser("render", help="Render structure and secure-code pages")
render.add_argument("--system", action="append")
render.set_defaults(func=cmd_render)
validate_parser = subparsers.add_parser("validate", help="Validate generated content")
validate_parser.add_argument("--system", action="append")
validate_parser.set_defaults(func=cmd_validate)
open_pr_parser = subparsers.add_parser("open-pr", help="Create Gitea PR from current changes")
open_pr_parser.add_argument("--base", default="main")
open_pr_parser.add_argument("--dry-run", action="store_true")
open_pr_parser.set_defaults(func=cmd_open_pr)
args = parser.parse_args()
return args.func(args)
if __name__ == "__main__":
raise SystemExit(main())