kb: expand authorized lab coverage and intel automation

这个提交包含在:
hao
2026-03-16 22:04:51 -07:00
父节点 cda31e86c7
当前提交 d0120fbf10
修改 592 个文件,包含 29025 行新增267 行删除

1
scripts/intel/__init__.py 普通文件
查看文件

@@ -0,0 +1 @@
"""Websafe threat intelligence automation package."""

45
scripts/intel/config.py 普通文件
查看文件

@@ -0,0 +1,45 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
import yaml
ROOT = Path(__file__).resolve().parents[2]
FRAMEWORK_ROOT = ROOT / "07-framework-security"
THREAT_INTEL_ROOT = ROOT / "08-threat-intel"
REGISTRY_ROOT = THREAT_INTEL_ROOT / "registry"
ADVISORIES_DIR = REGISTRY_ROOT / "advisories"
SYSTEMS_DIR = REGISTRY_ROOT / "systems"
TRIAGE_DIR = REGISTRY_ROOT / "triage"
GENERATED_DIR = THREAT_INTEL_ROOT / "generated"
SECURE_CODE_ROOT = ROOT / "05-defense" / "secure-code"
SOURCE_MAP_PATH = THREAT_INTEL_ROOT / "source-map.yaml"
STATE_DIR = Path.home() / ".local" / "state" / "websafe-intel"
STATE_PATH = STATE_DIR / "state.json"
def load_source_map() -> Dict[str, Any]:
with SOURCE_MAP_PATH.open("r", encoding="utf-8") as handle:
data = yaml.safe_load(handle)
if not isinstance(data, dict) or "systems" not in data:
raise ValueError("source-map.yaml must contain a top-level 'systems' list")
systems = data["systems"]
if not isinstance(systems, list):
raise ValueError("'systems' must be a list")
return data
def get_systems_by_group(source_map: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
groups: Dict[str, List[Dict[str, Any]]] = {}
for system in source_map["systems"]:
output_dir = Path(system["output_dir"])
parts = output_dir.parts
if len(parts) < 3:
raise ValueError(f"output_dir too short for system {system['system_id']}")
group = parts[1]
groups.setdefault(group, []).append(system)
return groups

290
scripts/intel/main.py 普通文件
查看文件

@@ -0,0 +1,290 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from typing import Any, Dict, List, Tuple
CURRENT_DIR = Path(__file__).resolve().parent
SCRIPTS_DIR = CURRENT_DIR.parent
if str(SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(SCRIPTS_DIR))
from intel.config import ADVISORIES_DIR, GENERATED_DIR, STATE_DIR, STATE_PATH, TRIAGE_DIR, load_source_map # noqa: E402
from intel.models import AdvisoryRecord # noqa: E402
from intel.normalize import normalize_candidates # noqa: E402
from intel.pr import open_pr # noqa: E402
from intel.render import render_case_pages, render_generated, render_registry, render_secure_code, render_system_scaffolding # noqa: E402
from intel.route import route_advisories # noqa: E402
from intel.sources.runner import collect_candidates # noqa: E402
from intel.utils import isoformat, load_all_json, now_utc, parse_since, read_json, write_json # noqa: E402
from intel.validators import validate # noqa: E402
def _load_existing_advisories() -> List[AdvisoryRecord]:
advisories: List[AdvisoryRecord] = []
for item in load_all_json(ADVISORIES_DIR):
try:
advisories.append(AdvisoryRecord(**item))
except TypeError:
continue
return advisories
def _load_existing_triage() -> List[Dict[str, Any]]:
return load_all_json(TRIAGE_DIR)
def _filter_source_map(source_map: Dict[str, Any], system_ids: List[str] | None) -> Dict[str, Any]:
if not system_ids:
return source_map
allowed = set(system_ids)
filtered = [system for system in source_map["systems"] if system["system_id"] in allowed]
found = {system["system_id"] for system in filtered}
missing = sorted(allowed - found)
if missing:
raise ValueError(f"Unknown system_id(s): {', '.join(missing)}")
return {**source_map, "systems": filtered}
def _merge_preserved_records(
selected_source_map: Dict[str, Any],
advisories: List[AdvisoryRecord],
triage: List[Dict[str, Any]],
) -> tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
selected_ids = {system["system_id"] for system in selected_source_map["systems"]}
preserved_advisories = [item for item in _load_existing_advisories() if item.system_id not in selected_ids]
preserved_triage = [item for item in _load_existing_triage() if item.get("system_id") not in selected_ids]
return preserved_advisories + advisories, preserved_triage + triage
def _summarize_changes(advisories: List[AdvisoryRecord]) -> Dict[str, Any]:
new_count = 0
updated_count = 0
touched = set()
for advisory in advisories:
path = ADVISORIES_DIR / f"{advisory.canonical_id}.json"
existing = read_json(path, default=None)
current = advisory.to_dict()
if existing is None:
new_count += 1
touched.add(advisory.system_id)
continue
if existing != current:
updated_count += 1
touched.add(advisory.system_id)
return {
"new_count": new_count,
"updated_count": updated_count,
"systems_touched": sorted(touched),
}
def _select_hotlane(
advisories: List[AdvisoryRecord],
triage: List[Dict[str, Any]],
) -> Tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
filtered = []
keep_ids = set()
for advisory in advisories:
if advisory.exploit_status in {"known_exploited", "active_exploitation", "in_the_wild"}:
filtered.append(advisory)
keep_ids.add(advisory.canonical_id)
continue
if advisory.cvss_score is not None and advisory.cvss_score >= 8.8:
filtered.append(advisory)
keep_ids.add(advisory.canonical_id)
continue
if advisory.severity == "critical":
filtered.append(advisory)
keep_ids.add(advisory.canonical_id)
filtered_triage = [item for item in triage if item.get("canonical_id") in keep_ids]
return filtered, filtered_triage
def _write_outputs(
source_map: Dict[str, Any],
advisories: List[AdvisoryRecord],
triage: List[Dict[str, Any]],
failures: List[str],
change_summary: Dict[str, Any],
) -> None:
render_registry(source_map, advisories, triage)
render_system_scaffolding(source_map, advisories)
render_case_pages(advisories)
render_secure_code(source_map)
render_generated(source_map, advisories, triage, failures, change_summary)
def pipeline(
full_source_map: Dict[str, Any],
source_map: Dict[str, Any],
since_arg: str,
tier: str | None,
include_undated: bool,
hotlane_only: bool = False,
) -> tuple[list[AdvisoryRecord], list[Dict[str, Any]], list[str], Dict[str, Any]]:
since_dt = None if tier == "history-full" else parse_since(since_arg, default_days=30)
candidates, failures = collect_candidates(source_map, since_dt=since_dt, tier=tier, include_undated=include_undated)
advisories, triage = normalize_candidates(candidates)
advisories = route_advisories(source_map, advisories)
if hotlane_only:
advisories, triage = _select_hotlane(advisories, triage)
change_summary = _summarize_changes(advisories)
render_map = source_map
if len(source_map["systems"]) != len(full_source_map["systems"]):
advisories, triage = _merge_preserved_records(source_map, advisories, triage)
render_map = full_source_map
_write_outputs(render_map, advisories, triage, failures, change_summary)
return advisories, triage, failures, change_summary
def cmd_render(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
allowed = {system["system_id"] for system in source_map["systems"]}
advisories = [item for item in _load_existing_advisories() if item.system_id in allowed]
triage = [item for item in _load_existing_triage() if item.get("system_id") in allowed]
summary = read_json(GENERATED_DIR / "run-summary.json", default={}) or {}
failures = summary.get("failures", [])
render_map = source_map
if len(source_map["systems"]) != len(full_source_map["systems"]):
advisories, triage = _merge_preserved_records(source_map, advisories, triage)
render_map = full_source_map
_write_outputs(render_map, advisories, triage, failures, summary)
return 0
def cmd_validate(args) -> int:
source_map = _filter_source_map(load_source_map(), args.system)
errors = validate(source_map)
if errors:
print("Validation failed:")
for error in errors:
print(f"- {error}")
return 1
print("Validation passed.")
return 0
def _write_state(status: str) -> None:
STATE_DIR.mkdir(parents=True, exist_ok=True)
state = read_json(STATE_PATH, default={}) or {}
state["last_success"] = isoformat(now_utc())
state["status"] = status
write_json(STATE_PATH, state)
def cmd_ingest(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
since = args.since
if since == "last-success":
state = read_json(STATE_PATH, default={}) or {}
since = state.get("last_success", "30d")
advisories, triage, failures, summary = pipeline(full_source_map, source_map, since, None, include_undated=False)
_write_state("success")
print(
f"Ingested {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_hotlane(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
advisories, triage, failures, summary = pipeline(full_source_map, source_map, "1d", None, include_undated=False, hotlane_only=True)
_write_state("success")
print(
f"Hotlane synced {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_reconcile(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
advisories, triage, failures, summary = pipeline(full_source_map, source_map, "30d", None, include_undated=False)
_write_state("success")
print(
f"Reconciled {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_backfill(args) -> int:
full_source_map = load_source_map()
source_map = _filter_source_map(full_source_map, args.system)
if args.dry_run:
candidates, failures = collect_candidates(source_map, since_dt=None, tier=args.tier, include_undated=True)
advisories, triage = normalize_candidates(candidates)
advisories = route_advisories(source_map, advisories)
if args.hotlane_only:
advisories, triage = _select_hotlane(advisories, triage)
print(
f"Dry run backfill tier={args.tier}: candidates={len(candidates)} advisories={len(advisories)} triage={len(triage)} failures={len(failures)}"
)
return 0
advisories, triage, failures, summary = pipeline(
full_source_map,
source_map,
"",
args.tier,
include_undated=True,
hotlane_only=args.hotlane_only,
)
print(
f"Backfilled {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
)
return 0
def cmd_open_pr(args) -> int:
print(open_pr(base_branch=args.base, dry_run=args.dry_run))
return 0
def main() -> int:
parser = argparse.ArgumentParser(description="Websafe threat intel automation")
subparsers = parser.add_subparsers(dest="command", required=True)
backfill = subparsers.add_parser("backfill", help="Fetch historical advisories")
backfill.add_argument("--tier", choices=["history-full", "rolling-24m"], required=True)
backfill.add_argument("--dry-run", action="store_true")
backfill.add_argument("--hotlane-only", action="store_true")
backfill.add_argument("--system", action="append")
backfill.set_defaults(func=cmd_backfill)
ingest = subparsers.add_parser("ingest", help="Fetch incremental advisories")
ingest.add_argument("--since", default="last-success")
ingest.add_argument("--system", action="append")
ingest.set_defaults(func=cmd_ingest)
hotlane = subparsers.add_parser("hotlane", help="Fetch only KEV / in-the-wild / critical updates")
hotlane.add_argument("--system", action="append")
hotlane.set_defaults(func=cmd_hotlane)
reconcile = subparsers.add_parser("reconcile", help="Reconcile the last 30 days of updates")
reconcile.add_argument("--system", action="append")
reconcile.set_defaults(func=cmd_reconcile)
render = subparsers.add_parser("render", help="Render structure and secure-code pages")
render.add_argument("--system", action="append")
render.set_defaults(func=cmd_render)
validate_parser = subparsers.add_parser("validate", help="Validate generated content")
validate_parser.add_argument("--system", action="append")
validate_parser.set_defaults(func=cmd_validate)
open_pr_parser = subparsers.add_parser("open-pr", help="Create Gitea PR from current changes")
open_pr_parser.add_argument("--base", default="main")
open_pr_parser.add_argument("--dry-run", action="store_true")
open_pr_parser.set_defaults(func=cmd_open_pr)
args = parser.parse_args()
return args.func(args)
if __name__ == "__main__":
raise SystemExit(main())

70
scripts/intel/models.py 普通文件
查看文件

@@ -0,0 +1,70 @@
from __future__ import annotations
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class Candidate:
system_id: str
display_name: str
category: str
advisory_mode: str
source_kind: str
source_name: str
source_confidence: str
source_url: str
title: str
published_at: Optional[str] = None
updated_at: Optional[str] = None
summary: str = ""
severity: str = "unknown"
cvss_score: Optional[float] = None
exploit_status: str = "unknown"
aliases: List[str] = field(default_factory=list)
cve_ids: List[str] = field(default_factory=list)
ghsa_ids: List[str] = field(default_factory=list)
osv_ids: List[str] = field(default_factory=list)
affected_versions: List[str] = field(default_factory=list)
fixed_versions: List[str] = field(default_factory=list)
package_name: Optional[str] = None
references: List[str] = field(default_factory=list)
raw: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class AdvisoryRecord:
canonical_id: str
system_id: str
display_name: str
category: str
advisory_mode: str
title: str
summary: str
published_at: Optional[str]
updated_at: Optional[str]
severity: str
cvss_score: Optional[float]
exploit_status: str
source_confidence: str
official_source_url: Optional[str]
secondary_source_urls: List[str]
aliases: List[str]
cve_ids: List[str]
ghsa_ids: List[str]
osv_ids: List[str]
affected_versions: List[str]
fixed_versions: List[str]
package_name: Optional[str]
render_markdown: bool
case_path: Optional[str]
secure_code_topics: List[str]
status: str
triage_reasons: List[str] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return asdict(self)

126
scripts/intel/normalize.py 普通文件
查看文件

@@ -0,0 +1,126 @@
from __future__ import annotations
from collections import defaultdict
from typing import Any, Dict, Iterable, List, Tuple
from intel.models import AdvisoryRecord, Candidate
from intel.utils import best_severity, short_hash, unique
CONFIDENCE_ORDER = {
"official": 4,
"ecosystem-authority": 3,
"research": 2,
"triage-only": 1,
}
def _best_confidence(values: Iterable[str]) -> str:
ordered = sorted(values, key=lambda value: CONFIDENCE_ORDER.get(value, 0), reverse=True)
return next((value for value in ordered if value), "triage-only")
def canonical_key(candidate: Candidate) -> str:
for alias in candidate.cve_ids + candidate.ghsa_ids + candidate.osv_ids + candidate.aliases:
if alias:
return f"{candidate.system_id}::{alias}"
return f"{candidate.system_id}::{short_hash(candidate.title, candidate.source_url)}"
def normalize_candidates(candidates: List[Candidate]) -> Tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
buckets: Dict[str, List[Candidate]] = defaultdict(list)
for candidate in candidates:
buckets[canonical_key(candidate)].append(candidate)
advisories: List[AdvisoryRecord] = []
triage: List[Dict[str, Any]] = []
for key, items in sorted(buckets.items()):
lead = sorted(
items,
key=lambda item: CONFIDENCE_ORDER.get(item.source_confidence, 0),
reverse=True,
)[0]
confidence = _best_confidence(item.source_confidence for item in items)
aliases = unique(alias for item in items for alias in item.aliases)
cve_ids = unique(value for item in items for value in item.cve_ids)
ghsa_ids = unique(value for item in items for value in item.ghsa_ids)
osv_ids = unique(value for item in items for value in item.osv_ids)
affected = unique(value for item in items for value in item.affected_versions)
fixed = unique(value for item in items for value in item.fixed_versions)
references = unique([item.source_url for item in items] + [ref for item in items for ref in item.references])
published = next((item.published_at for item in items if item.published_at), None)
updated = next((item.updated_at for item in items if item.updated_at), published)
severity = best_severity(item.severity for item in items)
cvss = next((item.cvss_score for item in items if item.cvss_score is not None), None)
exploit_status = next(
(item.exploit_status for item in items if item.exploit_status and item.exploit_status != "unknown"),
"unknown",
)
official_refs = [
item.source_url
for item in items
if item.source_confidence in {"official", "ecosystem-authority"} and item.source_url
]
triage_reasons = []
status = "generated"
if confidence not in {"official", "ecosystem-authority"}:
triage_reasons.append("best source confidence below registry threshold")
if not official_refs:
triage_reasons.append("no official or ecosystem-authority source URL")
if not (affected or fixed):
triage_reasons.append("missing affected/fixed version details")
if triage_reasons:
status = "triage"
triage.append(
{
"canonical_id": key.replace("::", "--"),
"system_id": lead.system_id,
"title": lead.title,
"reasons": triage_reasons,
"candidate_count": len(items),
"references": references,
}
)
advisories.append(
AdvisoryRecord(
canonical_id=key.replace("::", "--"),
system_id=lead.system_id,
display_name=lead.display_name,
category=lead.category,
advisory_mode=lead.advisory_mode,
title=lead.title,
summary=lead.summary,
published_at=published,
updated_at=updated,
severity=severity,
cvss_score=cvss,
exploit_status=exploit_status,
source_confidence=confidence,
official_source_url=official_refs[0] if official_refs else (references[0] if references else None),
secondary_source_urls=references[1:] if len(references) > 1 else [],
aliases=aliases,
cve_ids=cve_ids,
ghsa_ids=ghsa_ids,
osv_ids=osv_ids,
affected_versions=affected,
fixed_versions=fixed,
package_name=lead.package_name,
render_markdown=False,
case_path=None,
secure_code_topics=[],
status=status,
triage_reasons=triage_reasons,
metadata={
"source_names": unique(item.source_name for item in items),
"source_kinds": unique(item.source_kind for item in items),
"candidate_count": len(items),
},
)
)
return advisories, triage

98
scripts/intel/pr.py 普通文件
查看文件

@@ -0,0 +1,98 @@
from __future__ import annotations
import os
import re
from datetime import datetime
from typing import Optional
import requests
from intel.config import GENERATED_DIR, ROOT
from intel.utils import read_json, run
PR_PATHS = [
"README.md",
"05-defense/secure-code",
"07-framework-security",
"08-threat-intel",
"requirements-intel.txt",
"scripts/intel",
]
def create_branch_name() -> str:
return "codex/intel-" + datetime.now().strftime("%Y%m%d-%H%M")
def _parse_origin() -> Optional[dict]:
result = run(["git", "-C", str(ROOT), "remote", "get-url", "origin"], check=False)
if result.returncode != 0:
return None
url = result.stdout.strip()
match = re.match(r"https://([^/]+)/([^/]+)/([^/.]+)(?:\.git)?", url)
if not match:
return None
return {"host": match.group(1), "owner": match.group(2), "repo": match.group(3), "url": url}
def _changed_paths() -> list[str]:
status = run(["git", "-C", str(ROOT), "status", "--short", "--", *PR_PATHS], check=False)
lines = [line.rstrip() for line in status.stdout.splitlines() if line.strip()]
return lines
def open_pr(base_branch: str = "main", dry_run: bool = False) -> str:
origin = _parse_origin()
if not origin:
raise RuntimeError("Unable to parse origin remote URL")
changed = _changed_paths()
if not changed:
return "No intel-related changes to submit"
branch = create_branch_name()
if dry_run:
preview = "\n".join(f"- {line}" for line in changed[:40])
return f"Dry run only; would create branch {branch} with these paths:\n{preview}"
run(["git", "-C", str(ROOT), "checkout", "-b", branch])
run(["git", "-C", str(ROOT), "add", "--", *PR_PATHS])
run(["git", "-C", str(ROOT), "commit", "-m", f"intel: automated advisory ingest {branch}"])
run(["git", "-C", str(ROOT), "push", "-u", "origin", branch])
token = os.environ.get("GITEA_TOKEN")
if not token:
return f"Pushed branch {branch}, but GITEA_TOKEN is not set; PR not created"
summary = read_json(GENERATED_DIR / "run-summary.json", default={}) or {}
body_lines = [
"Automated advisory ingest update.",
"",
f"- New advisories: {summary.get('new_count', 0)}",
f"- Updated advisories: {summary.get('updated_count', 0)}",
f"- Triage count: {summary.get('triage_count', 0)}",
f"- Failure count: {len(summary.get('failures', []))}",
]
if summary.get("systems_touched"):
body_lines.append(f"- Systems touched: {', '.join(summary['systems_touched'])}")
if summary.get("failures"):
body_lines.extend(["", "Failed source adapters:"])
for failure in summary["failures"]:
body_lines.append(f"- {failure}")
payload = {
"title": f"Intel ingest {branch}",
"head": branch,
"base": base_branch,
"body": "\n".join(body_lines),
}
response = requests.post(
f"https://{origin['host']}/api/v1/repos/{origin['owner']}/{origin['repo']}/pulls",
headers={"Authorization": f"token {token}", "Content-Type": "application/json"},
json=payload,
timeout=30,
)
response.raise_for_status()
pr_url = response.json().get("html_url") or response.json().get("url")
return f"Created PR: {pr_url}"

547
scripts/intel/render.py 普通文件
查看文件

@@ -0,0 +1,547 @@
from __future__ import annotations
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List
from intel.config import FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, ROOT, SECURE_CODE_ROOT, SYSTEMS_DIR, TRIAGE_DIR
from intel.models import AdvisoryRecord
from intel.utils import ensure_dir, isoformat, now_utc, write_json, write_text
UTC = timezone.utc
LANGUAGES = [
"javascript-typescript",
"nodejs",
"java",
"php",
"python",
"ruby",
"csharp",
"go",
]
TOPIC_DESCRIPTIONS = {
"xss-output-encoding": "对不可信输入做上下文输出编码,避免 HTML、属性和脚本上下文执行。",
"dom-sink-hardening": "限制 `innerHTML`、模板拼接和 DOM sink 的直接写入。",
"csp-trusted-types": "用 CSP 和 Trusted Types 缩小前端执行面。",
"token-cookie-storage": "避免把敏感令牌长期暴露在可读浏览器存储中。",
"authz-server-side-recheck": "前置代理或 middleware 不能替代服务端最终鉴权。",
"ssrf-url-validation": "对 URL、协议、IP 和重定向做 allowlist 校验。",
"request-smuggling-boundary": "统一代理层和应用层的请求边界解释。",
"path-traversal-guard": "标准化路径并限制访问根目录。",
"file-upload-validation": "校验文件类型、名称、存储位置和执行权限。",
"plugin-extension-trust-policy": "限制第三方插件、模块和主题的信任边界。",
"dependency-upgrade-policy": "用自动化升级、锁版本和审查策略降低供应链风险。",
"proxy-trust-boundary": "只信任明确代理并限制头部透传。",
"deserialization-safety": "避免对不可信数据做危险反序列化。",
"template-injection-guard": "模板上下文中禁用危险表达式执行。",
}
TOPIC_SCENARIOS = {
"xss-output-encoding": "适用于模板输出、服务端渲染片段和后台管理界面回显场景。",
"dom-sink-hardening": "适用于前端模板拼接、Markdown 渲染器和富文本预览逻辑。",
"csp-trusted-types": "适用于高风险前端应用、管理端和需要限制脚本执行面的页面。",
"token-cookie-storage": "适用于浏览器端会话、管理接口令牌和单页应用认证态。",
"authz-server-side-recheck": "适用于代理层放行、路由守卫和后端最终授权重新确认。",
"ssrf-url-validation": "适用于 webhook、URL 导入、远程图片抓取和插件联动调用。",
"request-smuggling-boundary": "适用于代理链、WAF、CDN 和应用服务器之间的请求解析边界。",
"path-traversal-guard": "适用于下载、导入、附件预览和主题/模板读取路径。",
"file-upload-validation": "适用于媒体上传、插件安装、主题导入和日志附件接收。",
"plugin-extension-trust-policy": "适用于插件市场、主题仓库、第三方扩展和模块化系统。",
"dependency-upgrade-policy": "适用于 lockfile、SBOM、CI 审查和供应链更新节奏治理。",
"proxy-trust-boundary": "适用于真实 IP 透传、认证头转发和反向代理旁路风险。",
"deserialization-safety": "适用于缓存、任务队列、对象恢复和跨服务消息传递。",
"template-injection-guard": "适用于 SSR、模板引擎、邮件渲染和后台自定义视图。",
}
BAD_GOOD_SNIPPETS = {
"javascript-typescript": (
"const output = `<div>${userInput}</div>`;",
"const output = `<div>${escapeHtml(userInput)}</div>`;",
),
"nodejs": (
"res.send(`<div>${req.query.q}</div>`);",
"res.send(`<div>${escapeHtml(req.query.q)}</div>`);",
),
"java": (
"response.getWriter().write(\"<div>\" + value + \"</div>\");",
"response.getWriter().write(\"<div>\" + HtmlUtils.htmlEscape(value) + \"</div>\");",
),
"php": (
"echo \"<div>{$value}</div>\";",
"echo '<div>' . htmlspecialchars($value, ENT_QUOTES, 'UTF-8') . '</div>';",
),
"python": (
"return f\"<div>{value}</div>\"",
"return f\"<div>{escape(value)}</div>\"",
),
"ruby": (
"render inline: \"<div>#{value}</div>\"",
"render inline: \"<div>#{ERB::Util.html_escape(value)}</div>\"",
),
"csharp": (
"return Content($\"<div>{value}</div>\", \"text/html\");",
"return Content($\"<div>{HtmlEncoder.Default.Encode(value)}</div>\", \"text/html\");",
),
"go": (
"fmt.Fprintf(w, \"<div>%s</div>\", value)",
"template.HTMLEscape(w, []byte(value))",
),
}
SOURCE_KIND_URLS = {
"ghsa-global": "https://github.com/advisories",
"osv-batch": "https://osv.dev/",
"nvd-search": "https://nvd.nist.gov/vuln/search",
"kev-json": "https://www.cisa.gov/known-exploited-vulnerabilities-catalog",
"rss-feed": "https://www.rssboard.org/rss-specification",
}
TARGET_TYPES = ["lab-local", "lab-public", "authorized-third-party"]
MINIMAL_VALIDATION_GUIDANCE = "最小化验证、只读探测、可审计回显、受控注入。"
FORBIDDEN_SCENARIOS = [
"无归属证明或无明确授权的公网目标",
"知名公共网站或与测试无关的第三方资产",
"会造成持久破坏、数据越权下载或不可回滚影响的动作",
]
def _recent_count(items: Iterable[AdvisoryRecord], days: int = 30) -> int:
cutoff = now_utc() - timedelta(days=days)
total = 0
for item in items:
for stamp in (item.updated_at, item.published_at):
if not stamp:
continue
try:
dt = datetime.fromisoformat(stamp.replace("Z", "+00:00")).astimezone(UTC)
except ValueError:
continue
if dt >= cutoff:
total += 1
break
return total
def _group_name(output_dir: str) -> str:
return Path(output_dir).parts[1]
def _abs_repo_path(*parts: str) -> str:
cleaned: List[str] = []
for part in parts:
if not part:
continue
cleaned.extend(Path(part).parts)
return str(ROOT.joinpath(*cleaned))
def _source_reference(source: Dict[str, Any]) -> str:
url = source.get("url") or SOURCE_KIND_URLS.get(source.get("kind", ""))
qualifiers = []
if source.get("ecosystem"):
qualifiers.append(f"ecosystem={source['ecosystem']}")
if source.get("keyword"):
qualifiers.append(f"keyword={source['keyword']}")
if source.get("advisory_mode"):
qualifiers.append(f"mode={source['advisory_mode']}")
suffix = f" ({'; '.join(qualifiers)})" if qualifiers else ""
if url:
return f"`{source['confidence']}` [{source['name']}]({url}){suffix}"
return f"`{source['confidence']}` {source['name']}{suffix}"
def _clear_json_dir(path: Path) -> None:
ensure_dir(path)
for file_path in path.glob("*.json"):
file_path.unlink()
def render_system_scaffolding(source_map: Dict[str, Any], advisories: List[AdvisoryRecord]) -> None:
grouped: Dict[str, List[AdvisoryRecord]] = defaultdict(list)
for advisory in advisories:
grouped[advisory.system_id].append(advisory)
groups: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
for system in source_map["systems"]:
groups[_group_name(system["output_dir"])].append(system)
system_dir = FRAMEWORK_ROOT / _group_name(system["output_dir"]) / system["system_id"]
ensure_dir(system_dir / "cases")
items = sorted(grouped.get(system["system_id"], []), key=lambda item: item.published_at or "", reverse=True)
markdown_count = len([item for item in items if item.render_markdown and item.case_path])
index_lines = [
f"# {system['display_name']}",
"",
"> `LAB ONLY` | `AUTHORIZED TARGETS ONLY` | 自动生成索引",
"",
f"- 系统 ID: `{system['system_id']}`",
f"- 分类: `{system['category']}`",
f"- 覆盖策略: `{system['tier']}`",
f"- 总案例数: `{len(items)}`",
f"- 近 30 天新增/更新: `{_recent_count(items)}`",
f"- 重点 Markdown 案例数: `{markdown_count}`",
f"- 最近渲染时间: `{isoformat(now_utc())}`",
"",
"## 目标约束",
"",
f"- 适用目标类型: `{', '.join(TARGET_TYPES)}`",
"- 是否允许公网验证: `yes, but ownership or authorization is required`",
"- 授权前提: 资产归属可证明,或已取得书面/明确授权。",
f"- 最小化验证方式: {MINIMAL_VALIDATION_GUIDANCE}",
f"- 禁止场景: {''.join(FORBIDDEN_SCENARIOS)}",
"",
"## 来源",
"",
]
for bucket in ("official_sources", "ecosystem_sources", "research_sources"):
for source in system.get(bucket, []):
index_lines.append(f"- {_source_reference(source)}")
index_lines.extend(
[
"",
"## 案例列表",
"",
"| 标题 | 严重度 | 状态 | 来源置信度 | 更新时间 | 案例页 |",
"|------|--------|------|------------|----------|--------|",
]
)
if items:
for item in items:
case_link = f"[link]({_abs_repo_path(item.case_path)})" if item.case_path else "-"
timestamp = item.updated_at or item.published_at or ""
index_lines.append(
f"| {item.title} | `{item.severity}` | `{item.status}` | `{item.source_confidence}` | `{timestamp}` | {case_link} |"
)
else:
index_lines.append("| No advisories yet | `n/a` | `empty` | `n/a` | `n/a` | - |")
write_text(system_dir / "INDEX.md", "\n".join(index_lines))
system_registry_path = _abs_repo_path("08-threat-intel", "registry", "systems", f"{system['system_id']}.json")
readme_lines = [
f"# {system['display_name']}",
"",
"> `LAB ONLY` | `AUTHORIZED TARGETS ONLY`",
"",
f"- 分类: `{system['category']}`",
f"- 覆盖层级: `{system['tier']}`",
f"- Advisory 模式: {', '.join(system.get('advisory_modes', []))}",
f"- 输出目录: `{system['output_dir']}`",
f"- 修复主题: {', '.join(system.get('secure_code_topics', []))}",
f"- 适用目标类型: `{', '.join(TARGET_TYPES)}`",
"- 是否允许公网验证: `yes, but only for owned or authorized targets`",
f"- 最小化验证方式: {MINIMAL_VALIDATION_GUIDANCE}",
f"- 禁止场景: {''.join(FORBIDDEN_SCENARIOS)}",
"",
f"- 自动索引: [INDEX.md]({_abs_repo_path(system['output_dir'], 'INDEX.md')})",
f"- Registry 统计: [{system['system_id']}.json]({system_registry_path})",
]
write_text(system_dir / "README.md", "\n".join(readme_lines))
for group, systems in groups.items():
lines = [
f"# {group}",
"",
"> 自动生成系统分组索引",
"",
f"- 系统数量: `{len(systems)}`",
"- 允许范围: `lab-local`, `lab-public`, `authorized-third-party`",
"",
]
for system in sorted(systems, key=lambda item: item["display_name"].lower()):
lines.append(f"- [{system['display_name']}]({_abs_repo_path(system['output_dir'], 'README.md')})")
write_text(FRAMEWORK_ROOT / group / "README.md", "\n".join(lines))
root_lines = [
"# 主流开源 Web 系统安全",
"",
"> `LAB ONLY` | `AUTHORIZED TARGETS ONLY`",
"",
f"- 系统总数: `{len(source_map['systems'])}`",
"- 覆盖语境: 授权攻防实验、验证性注入、最小化验证、案例映射。",
"- 不适用: 未授权公网目标、泛互联网枚举、默认生产推荐基线。",
"",
f"- [cms]({_abs_repo_path('07-framework-security', 'cms', 'README.md')})",
f"- [ecommerce]({_abs_repo_path('07-framework-security', 'ecommerce', 'README.md')})",
f"- [frameworks]({_abs_repo_path('07-framework-security', 'frameworks', 'README.md')})",
f"- [servers]({_abs_repo_path('07-framework-security', 'servers', 'README.md')})",
f"- [platforms]({_abs_repo_path('07-framework-security', 'platforms', 'README.md')})",
]
write_text(FRAMEWORK_ROOT / "README.md", "\n".join(root_lines))
def render_case_pages(advisories: List[AdvisoryRecord]) -> None:
for item in advisories:
if not item.render_markdown or not item.case_path:
continue
lines = [
"---",
f'title: "{item.title.replace(chr(34), chr(39))}"',
f'system_id: "{item.system_id}"',
f'category: "{item.category}"',
f'advisory_mode: "{item.advisory_mode}"',
f'published_date: "{item.published_at or ""}"',
f'updated_date: "{item.updated_at or item.published_at or ""}"',
f'severity: "{item.severity}"',
f'exploit_status: "{item.exploit_status}"',
f'source_confidence: "{item.source_confidence}"',
'target_types:',
' - "lab-local"',
' - "lab-public"',
' - "authorized-third-party"',
'allow_public_validation: "yes, with ownership or explicit authorization"',
'authorization_prerequisite: "asset ownership proof or explicit written authorization"',
'minimal_validation: "read-only probe, controlled payload, reversible test"',
"aliases:",
]
for alias in item.aliases:
lines.append(f' - "{alias}"')
lines.append("affected_versions:")
for version in item.affected_versions[:20]:
lines.append(f' - "{version}"')
lines.append("fixed_versions:")
for version in item.fixed_versions[:20]:
lines.append(f' - "{version}"')
lines.append("secure_code_topics:")
for topic in item.secure_code_topics:
lines.append(f' - "{topic}"')
lines.extend(
[
f'primary_source: "{item.official_source_url or ""}"',
"---",
"",
f"# {item.title}",
"",
"## 事件层",
"",
f"- Canonical ID: `{item.canonical_id}`",
f"- 系统: `{item.system_id}`",
f"- 严重度: `{item.severity}`",
f"- 来源置信度: `{item.source_confidence}`",
f"- 官方主源: {item.official_source_url or '-'}",
f"- 影响版本: `{', '.join(item.affected_versions[:10]) or 'unknown'}`",
f"- 修复版本: `{', '.join(item.fixed_versions[:10]) or 'unknown'}`",
"",
"## 其他来源",
"",
]
)
if item.secondary_source_urls:
for ref in item.secondary_source_urls[:20]:
lines.append(f"- {ref}")
else:
lines.append("- 无额外来源")
lines.extend(
[
"",
"## 实验层",
"",
"- 仅用于自有资产、测试环境或已明确授权目标。",
"- 允许公网可达目标,但必须满足资产归属或明确授权前提。",
f"- 最小化验证方式: {MINIMAL_VALIDATION_GUIDANCE}",
"- 若该案例涉及插件、模块或扩展,应同时检查供应链与升级策略。",
f"- 禁止场景: {''.join(FORBIDDEN_SCENARIOS)}",
"",
"## 修复示例",
"",
]
)
for topic in item.secure_code_topics:
for language in LANGUAGES:
path = SECURE_CODE_ROOT / language / f"{topic}.md"
if path.exists():
lines.append(f"- [{language}:{topic}]({_abs_repo_path('05-defense', 'secure-code', language, f'{topic}.md')})")
write_text(ROOT / item.case_path, "\n".join(lines))
def render_registry(source_map: Dict[str, Any], advisories: List[AdvisoryRecord], triage: List[Dict[str, Any]]) -> None:
_clear_json_dir(REGISTRY_ROOT / "advisories")
_clear_json_dir(REGISTRY_ROOT / "systems")
_clear_json_dir(TRIAGE_DIR)
grouped: Dict[str, List[AdvisoryRecord]] = defaultdict(list)
for advisory in advisories:
write_json(REGISTRY_ROOT / "advisories" / f"{advisory.canonical_id}.json", advisory.to_dict())
grouped[advisory.system_id].append(advisory)
triage_by_system: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
for item in triage:
triage_by_system[item["system_id"]].append(item)
write_json(TRIAGE_DIR / f"{item['canonical_id']}.json", item)
for system in source_map["systems"]:
system_id = system["system_id"]
items = grouped.get(system_id, [])
payload = {
"system_id": system_id,
"display_name": system["display_name"],
"category": system["category"],
"tier": system["tier"],
"total": len(items),
"markdown_cases": len([item for item in items if item.case_path]),
"triage_count": len(triage_by_system.get(system_id, [])),
"latest_update": max((item.updated_at or item.published_at or "" for item in items), default=""),
"output_dir": system["output_dir"],
"secure_code_topics": system.get("secure_code_topics", []),
"items": [item.canonical_id for item in sorted(items, key=lambda item: item.published_at or "", reverse=True)],
}
write_json(SYSTEMS_DIR / f"{system_id}.json", payload)
def render_generated(
source_map: Dict[str, Any],
advisories: List[AdvisoryRecord],
triage: List[Dict[str, Any]],
failures: List[str],
change_summary: Dict[str, Any] | None = None,
) -> None:
ensure_dir(GENERATED_DIR)
systems = {item["system_id"]: item for item in source_map["systems"]}
change_summary = change_summary or {}
triage_by_system: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
for item in triage:
triage_by_system[item["system_id"]].append(item)
coverage_lines = [
"# 覆盖矩阵",
"",
"| 系统 | 分类 | 覆盖策略 | 历史全量 | 近两年全量 | 全量 registry | 重点案例 Markdown | secure-code 关联 | 自动同步状态 | triage | 最近更新 |",
"|------|------|----------|----------|------------|--------------|--------------------|------------------|--------------|--------|----------|",
]
by_system: Dict[str, List[AdvisoryRecord]] = defaultdict(list)
for advisory in advisories:
by_system[advisory.system_id].append(advisory)
for system_id, system in sorted(systems.items()):
items = by_system.get(system_id, [])
markdown_count = len([item for item in items if item.case_path])
sync_state = "seeded" if items else "scaffolded"
recent = max((item.updated_at or item.published_at or "" for item in items), default="")
coverage_lines.append(
f"| {system['display_name']} | `{system['category']}` | `{system['tier']}` | `{'yes' if system['tier'] == 'history-full' else '-'}` | `yes` | `{len(items)}` | `{markdown_count}` | `{len(system.get('secure_code_topics', []))}` | `{sync_state}` | `{len(triage_by_system.get(system_id, []))}` | `{recent}` |"
)
write_text(GENERATED_DIR / "coverage-matrix.md", "\n".join(coverage_lines))
markdown_total = len([item for item in advisories if item.case_path])
latest_lines = [
"# 最新同步摘要",
"",
f"- 渲染时间: `{isoformat(now_utc())}`",
f"- 系统数量: `{len(source_map['systems'])}`",
f"- Advisory 数量: `{len(advisories)}`",
f"- 重点 Markdown 数量: `{markdown_total}`",
f"- 新增记录: `{change_summary.get('new_count', 0)}`",
f"- 更新记录: `{change_summary.get('updated_count', 0)}`",
f"- Triage 数量: `{len(triage)}`",
f"- 失败的 source adapter: `{len(failures)}`",
"",
]
if failures:
latest_lines.extend(["## 失败列表", ""])
for failure in failures:
latest_lines.append(f"- {failure}")
write_text(GENERATED_DIR / "latest-ingest.md", "\n".join(latest_lines))
write_json(
GENERATED_DIR / "run-summary.json",
{
"generated_at": isoformat(now_utc()),
"system_count": len(source_map["systems"]),
"advisory_count": len(advisories),
"markdown_count": markdown_total,
"new_count": change_summary.get("new_count", 0),
"updated_count": change_summary.get("updated_count", 0),
"systems_touched": change_summary.get("systems_touched", []),
"triage_count": len(triage),
"failures": failures,
},
)
def render_secure_code(source_map: Dict[str, Any]) -> None:
systems = source_map["systems"]
related = defaultdict(set)
for system in systems:
for topic in system.get("secure_code_topics", []):
related[topic].add(system["display_name"])
root_lines = [
"# 安全编码修复库",
"",
"> `LAB ONLY` | 修复主题用于把实验发现映射回代码整改,不代表默认生产基线。",
"",
"- 语言范围: `javascript-typescript`, `nodejs`, `java`, `php`, `python`, `ruby`, `csharp`, `go`",
"- 主题范围: 输出编码、DOM sink、CSP / Trusted Types、令牌存储、鉴权复核、SSRF、走私边界、路径穿越、文件上传、插件信任、依赖升级、代理信任、反序列化、模板注入。",
"",
]
for language in LANGUAGES:
root_lines.append(f"- [{language}]({_abs_repo_path('05-defense', 'secure-code', language, 'README.md')})")
write_text(SECURE_CODE_ROOT / "README.md", "\n".join(root_lines))
write_text(SECURE_CODE_ROOT / "INDEX.md", "\n".join(root_lines))
for language in LANGUAGES:
language_dir = SECURE_CODE_ROOT / language
ensure_dir(language_dir)
index_lines = [
f"# {language}",
"",
"> 自动生成修复主题索引",
"",
"- 语境: 授权攻防实验后的修复映射,不作为生产默认推荐模版。",
"",
]
for topic, description in TOPIC_DESCRIPTIONS.items():
index_lines.append(f"- [{topic}]({_abs_repo_path('05-defense', 'secure-code', language, f'{topic}.md')}) - {description}")
bad, good = BAD_GOOD_SNIPPETS[language]
lines = [
f"# {topic}",
"",
"> `LAB ONLY` | 修复主题页",
"",
f"- 语言: `{language}`",
f"- 主题: `{topic}`",
f"- 说明: {description}",
f"- 典型场景: {TOPIC_SCENARIOS.get(topic, '把实验问题还原为可修复的代码模式。')}",
"",
"## 脆弱示例",
"",
f"```{_code_fence(language)}",
bad,
"```",
"",
"## 更安全的写法",
"",
f"```{_code_fence(language)}",
good,
"```",
"",
"## 检查清单",
"",
"- 明确输入边界与不可信来源",
"- 在服务端或可信封装层统一做校验/转义/约束",
"- 对关键路径补充自动化测试和依赖升级策略",
"",
"## 相关系统",
"",
]
for display_name in sorted(related.get(topic, [])):
lines.append(f"- {display_name}")
write_text(language_dir / f"{topic}.md", "\n".join(lines))
write_text(language_dir / "INDEX.md", "\n".join(index_lines))
write_text(language_dir / "README.md", "\n".join(index_lines))
def _code_fence(language: str) -> str:
mapping = {
"javascript-typescript": "ts",
"nodejs": "js",
"java": "java",
"php": "php",
"python": "py",
"ruby": "rb",
"csharp": "cs",
"go": "go",
}
return mapping.get(language, "")

91
scripts/intel/route.py 普通文件
查看文件

@@ -0,0 +1,91 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
from intel.models import AdvisoryRecord
from intel.utils import slugify
TOPIC_KEYWORDS = {
"xss-output-encoding": ["xss", "cross-site scripting"],
"dom-sink-hardening": ["dom xss", "innerhtml", "outerhtml"],
"csp-trusted-types": ["trusted types", "content security policy", "csp"],
"token-cookie-storage": ["cookie", "token", "session", "jwt"],
"authz-server-side-recheck": ["authorization bypass", "auth bypass", "improper authorization", "access control"],
"ssrf-url-validation": ["ssrf", "server-side request forgery"],
"request-smuggling-boundary": ["request smuggling", "http desync"],
"path-traversal-guard": ["path traversal", "directory traversal"],
"file-upload-validation": ["file upload", "upload"],
"plugin-extension-trust-policy": ["plugin", "extension", "module", "theme"],
"dependency-upgrade-policy": ["dependency", "supply chain", "advisory", "package"],
"proxy-trust-boundary": ["proxy", "middleware", "reverse proxy", "header trust"],
"deserialization-safety": ["deserialization", "serialization"],
"template-injection-guard": ["template injection", "ssti"],
}
HIGH_VALUE_TERMS = [
"rce",
"remote code execution",
"authorization bypass",
"auth bypass",
"known_exploited",
"known exploited",
"ssrf",
"deserialization",
]
def _pick_topics(system: Dict[str, Any], advisory: AdvisoryRecord) -> List[str]:
haystack = " ".join(
filter(
None,
[
advisory.title,
advisory.summary,
" ".join(advisory.aliases),
],
)
).lower()
topics = list(system.get("secure_code_topics", []))
for topic, keywords in TOPIC_KEYWORDS.items():
if any(keyword in haystack for keyword in keywords):
topics.append(topic)
# preserve order while deduping
seen = set()
result = []
for topic in topics:
if topic not in seen:
seen.add(topic)
result.append(topic)
return result
def _should_render(system: Dict[str, Any], advisory: AdvisoryRecord) -> bool:
if advisory.status == "triage":
return False
policy = system.get("render_policy", {})
if advisory.advisory_mode == "core" and policy.get("core_always_markdown", False):
return True
haystack = f"{advisory.title} {advisory.summary} {advisory.exploit_status}".lower()
if advisory.exploit_status and advisory.exploit_status != "unknown":
return True
if advisory.cvss_score is not None and advisory.cvss_score >= 8.8:
return True
if advisory.severity in {"critical", "high"} and any(term in haystack for term in HIGH_VALUE_TERMS):
return True
return False
def route_advisories(source_map: Dict[str, Any], advisories: List[AdvisoryRecord]) -> List[AdvisoryRecord]:
systems = {system["system_id"]: system for system in source_map["systems"]}
routed: List[AdvisoryRecord] = []
for advisory in advisories:
system = systems[advisory.system_id]
advisory.secure_code_topics = _pick_topics(system, advisory)
advisory.render_markdown = _should_render(system, advisory)
if advisory.render_markdown:
slug = slugify("-".join(filter(None, [advisory.system_id, advisory.cve_ids[0] if advisory.cve_ids else advisory.ghsa_ids[0] if advisory.ghsa_ids else advisory.title])))
advisory.case_path = str(Path(system["output_dir"]) / "cases" / f"{slug}.md")
routed.append(advisory)
return routed

11
scripts/intel/run-hourly.sh 普通文件
查看文件

@@ -0,0 +1,11 @@
#!/bin/bash
set -euo pipefail
cd /Users/x/websafe
LOG_DIR="/Users/x/websafe/08-threat-intel/generated/logs"
mkdir -p "$LOG_DIR"
STAMP="$(date '+%Y%m%d-%H%M%S')"
exec >> "$LOG_DIR/hourly-$STAMP.log" 2>&1
echo "[hourly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
python3 /Users/x/websafe/scripts/intel/main.py hotlane

查看文件

@@ -0,0 +1,11 @@
#!/bin/bash
set -euo pipefail
cd /Users/x/websafe
LOG_DIR="/Users/x/websafe/08-threat-intel/generated/logs"
mkdir -p "$LOG_DIR"
STAMP="$(date '+%Y%m%d-%H%M%S')"
exec >> "$LOG_DIR/nightly-$STAMP.log" 2>&1
echo "[nightly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
python3 /Users/x/websafe/scripts/intel/main.py ingest --since last-success

查看文件

@@ -0,0 +1,11 @@
#!/bin/bash
set -euo pipefail
cd /Users/x/websafe
LOG_DIR="/Users/x/websafe/08-threat-intel/generated/logs"
mkdir -p "$LOG_DIR"
STAMP="$(date '+%Y%m%d-%H%M%S')"
exec >> "$LOG_DIR/weekly-$STAMP.log" 2>&1
echo "[weekly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
python3 /Users/x/websafe/scripts/intel/main.py reconcile

查看文件

@@ -0,0 +1 @@
"""Source adapters for advisory ingestion."""

查看文件

@@ -0,0 +1,57 @@
from __future__ import annotations
from typing import Any, Dict, List
import requests
from intel.models import Candidate
from intel.utils import unique
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
response.raise_for_status()
payload = response.json()
keywords = {kw.lower() for kw in source.get("keywords") or system.get("kev_keywords", []) or [system["display_name"]]}
candidates: List[Candidate] = []
for vuln in payload.get("vulnerabilities", []):
haystack = " ".join(
filter(
None,
[
vuln.get("vendorProject"),
vuln.get("product"),
vuln.get("vulnerabilityName"),
vuln.get("shortDescription"),
],
)
).lower()
if not any(keyword in haystack for keyword in keywords):
continue
cve = vuln.get("cveID")
refs = [source["url"]]
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=source["url"],
title=vuln.get("vulnerabilityName") or cve or f"KEV advisory for {system['display_name']}",
published_at=vuln.get("dateAdded"),
updated_at=vuln.get("dueDate"),
summary=vuln.get("shortDescription") or "",
severity="critical",
exploit_status="known_exploited",
aliases=unique([cve]),
cve_ids=[cve] if cve else [],
references=refs,
raw=vuln,
)
)
return candidates

查看文件

@@ -0,0 +1,120 @@
from __future__ import annotations
import os
from typing import Any, Dict, List
import requests
from intel.models import Candidate
from intel.utils import unique
API_URL = "https://api.github.com/advisories"
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
headers = {"Accept": "application/vnd.github+json", "User-Agent": "websafe-intel"}
token = os.environ.get("GITHUB_TOKEN")
if token:
headers["Authorization"] = f"Bearer {token}"
page = 1
packages = {
item["name"].lower(): item["ecosystem"].lower()
for item in system.get("package_names", [])
if item.get("name") and item.get("ecosystem")
}
keyword_set = {value.lower() for value in system.get("ghsa_keywords", [])}
candidates: List[Candidate] = []
while True:
response = requests.get(
API_URL,
headers=headers,
params={"per_page": 100, "page": page, "ecosystem": source.get("ecosystem")},
timeout=30,
)
if response.status_code == 403 and "rate limit" in response.text.lower():
raise requests.HTTPError("GitHub advisory rate limit exceeded; set GITHUB_TOKEN for higher quota", response=response)
response.raise_for_status()
advisories = response.json()
if not advisories:
break
for advisory in advisories:
matched_vulns = []
for vuln in advisory.get("vulnerabilities", []):
package = (vuln.get("package") or {})
package_name = (package.get("name") or "").lower()
ecosystem = (package.get("ecosystem") or "").lower()
if package_name in packages and packages[package_name] == ecosystem:
matched_vulns.append(vuln)
haystack = " ".join(
filter(
None,
[
advisory.get("summary"),
advisory.get("description"),
advisory.get("ghsa_id"),
advisory.get("cve_id"),
],
)
).lower()
if not matched_vulns and keyword_set and not any(keyword in haystack for keyword in keyword_set):
continue
if not matched_vulns and not keyword_set:
continue
affected_versions = []
fixed_versions = []
package_name = None
for vuln in matched_vulns:
if vuln.get("vulnerable_version_range"):
affected_versions.append(vuln["vulnerable_version_range"])
patched = vuln.get("first_patched_version") or {}
if patched.get("identifier"):
fixed_versions.append(patched["identifier"])
if not package_name and vuln.get("package"):
package_name = vuln["package"].get("name")
aliases = unique(
[
advisory.get("ghsa_id"),
advisory.get("cve_id"),
*(advisory.get("identifiers") or []),
]
)
cve_ids = [advisory["cve_id"]] if advisory.get("cve_id") else []
ghsa_ids = [advisory["ghsa_id"]] if advisory.get("ghsa_id") else []
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=advisory.get("html_url") or API_URL,
title=advisory.get("summary") or advisory.get("ghsa_id") or "GitHub advisory",
published_at=advisory.get("published_at"),
updated_at=advisory.get("updated_at"),
summary=advisory.get("description") or "",
severity=(advisory.get("severity") or "unknown").lower(),
aliases=aliases,
cve_ids=cve_ids,
ghsa_ids=ghsa_ids,
affected_versions=unique(affected_versions),
fixed_versions=unique(fixed_versions),
package_name=package_name,
references=[advisory.get("html_url")] if advisory.get("html_url") else [],
raw=advisory,
)
)
page += 1
if len(advisories) < 100:
break
return candidates

查看文件

@@ -0,0 +1,56 @@
from __future__ import annotations
import re
from html import unescape
from typing import Any, Dict, List
from urllib.parse import urljoin
import requests
from intel.models import Candidate
from intel.utils import unique
ANCHOR_RE = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
TAG_RE = re.compile(r"<[^>]+>")
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
response.raise_for_status()
html = response.text
keywords = {kw.lower() for kw in source.get("keywords", [])}
candidates: List[Candidate] = []
seen = set()
for href, text in ANCHOR_RE.findall(html):
title = unescape(TAG_RE.sub(" ", text)).strip()
if not title:
continue
absolute = urljoin(source["url"], href)
haystack = f"{title} {absolute}".lower()
if keywords and not any(keyword in haystack for keyword in keywords):
continue
if absolute in seen:
continue
seen.add(absolute)
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=absolute,
title=title,
summary="",
severity="unknown",
references=unique([absolute]),
raw={"href": absolute, "title": title},
)
)
if len(candidates) >= source.get("max_items", 50):
break
return candidates

查看文件

@@ -0,0 +1,68 @@
from __future__ import annotations
import os
from typing import Any, Dict, List
import requests
from intel.models import Candidate
from intel.utils import unique
API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
params = {
"keywordSearch": source.get("keyword") or system["display_name"],
"resultsPerPage": source.get("results_per_page", 50),
}
headers = {"User-Agent": "websafe-intel"}
api_key = os.environ.get("NVD_API_KEY")
if api_key:
headers["apiKey"] = api_key
response = requests.get(API_URL, headers=headers, params=params, timeout=30)
response.raise_for_status()
payload = response.json()
candidates: List[Candidate] = []
for item in payload.get("vulnerabilities", []):
cve = item.get("cve", {})
descriptions = cve.get("descriptions", [])
description = next((d.get("value") for d in descriptions if d.get("lang") == "en"), "")
metrics = cve.get("metrics", {})
severity = "unknown"
cvss_score = None
for key in ("cvssMetricV31", "cvssMetricV30", "cvssMetricV2"):
entries = metrics.get(key, [])
if entries:
data = entries[0].get("cvssData", {})
severity = (entries[0].get("baseSeverity") or data.get("baseSeverity") or "unknown").lower()
cvss_score = data.get("baseScore")
break
refs = [ref.get("url") for ref in cve.get("references", []) if ref.get("url")]
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=refs[0] if refs else API_URL,
title=cve.get("id") or f"NVD advisory for {system['display_name']}",
published_at=cve.get("published"),
updated_at=cve.get("lastModified"),
summary=description or "",
severity=severity,
cvss_score=cvss_score,
aliases=unique([cve.get("id")]),
cve_ids=[cve.get("id")] if cve.get("id") else [],
references=refs,
raw=item,
)
)
return candidates

查看文件

@@ -0,0 +1,154 @@
from __future__ import annotations
import re
from typing import Any, Dict, List
import requests
from intel.models import Candidate
from intel.utils import unique
QUERY_BATCH_URL = "https://api.osv.dev/v1/querybatch"
DETAIL_URL = "https://api.osv.dev/v1/vulns/{vuln_id}"
CVSS_SCORE_RE = re.compile(r"/CVSS:3\.[01]/AV:[A-Z]/AC:[A-Z]/PR:[A-Z]/UI:[A-Z]/S:[A-Z]/C:[A-Z]/I:[A-Z]/A:[A-Z]")
NUMERIC_SCORE_RE = re.compile(r"([0-9]+(?:\.[0-9]+)?)")
def _fetch_detail(session: requests.Session, vuln_id: str) -> Dict[str, Any]:
response = session.get(
DETAIL_URL.format(vuln_id=vuln_id),
headers={"User-Agent": "websafe-intel"},
timeout=30,
)
response.raise_for_status()
return response.json()
def _fixed_versions(vuln: Dict[str, Any]) -> List[str]:
fixed = []
for affected in vuln.get("affected", []):
for rng in affected.get("ranges", []):
for event in rng.get("events", []):
if event.get("fixed"):
fixed.append(event["fixed"])
return unique(fixed)
def _affected_versions(vuln: Dict[str, Any]) -> List[str]:
versions = []
ranges = []
for affected in vuln.get("affected", []):
versions.extend(affected.get("versions", [])[:20])
for rng in affected.get("ranges", []):
introduced = None
fixed = None
last_affected = None
limit = None
for event in rng.get("events", []):
introduced = introduced or event.get("introduced")
fixed = fixed or event.get("fixed")
last_affected = last_affected or event.get("last_affected")
limit = limit or event.get("limit")
if introduced or fixed or last_affected or limit:
parts = []
if introduced:
parts.append(f"introduced={introduced}")
if last_affected:
parts.append(f"last_affected={last_affected}")
if fixed:
parts.append(f"fixed<{fixed}")
if limit:
parts.append(f"limit<{limit}")
ranges.append(", ".join(parts))
return unique(versions + ranges)
def _severity(vuln: Dict[str, Any]) -> tuple[str, float | None]:
best_score = None
for sev in vuln.get("severity", []):
score = sev.get("score", "")
match = NUMERIC_SCORE_RE.search(score)
if match:
try:
best_score = float(match.group(1))
break
except ValueError:
continue
if best_score is None:
return "unknown", None
if best_score >= 9.0:
return "critical", best_score
if best_score >= 7.0:
return "high", best_score
if best_score >= 4.0:
return "medium", best_score
return "low", best_score
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
packages = system.get("package_names", [])
if not packages:
return []
queries = [{"package": {"name": pkg["name"], "ecosystem": pkg["ecosystem"]}} for pkg in packages]
session = requests.Session()
response = session.post(
QUERY_BATCH_URL,
json={"queries": queries},
headers={"User-Agent": "websafe-intel"},
timeout=30,
)
response.raise_for_status()
payload = response.json()
detail_cache: Dict[str, Dict[str, Any]] = {}
candidates: List[Candidate] = []
for package, result in zip(packages, payload.get("results", [])):
for summary in result.get("vulns", []):
vuln_id = summary.get("id")
if not vuln_id:
continue
if vuln_id not in detail_cache:
detail_cache[vuln_id] = _fetch_detail(session, vuln_id)
vuln = detail_cache[vuln_id]
aliases = unique(vuln.get("aliases", []) + [vuln.get("id")])
refs = [ref.get("url") for ref in vuln.get("references", []) if ref.get("url")]
severity, cvss_score = _severity(vuln)
package_name = package["name"]
if not package_name:
for affected in vuln.get("affected", []):
pkg = affected.get("package") or {}
if pkg.get("name"):
package_name = pkg["name"]
break
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=refs[0] if refs else DETAIL_URL.format(vuln_id=vuln_id),
title=vuln.get("summary") or vuln.get("id") or f"OSV advisory for {package['name']}",
published_at=vuln.get("published"),
updated_at=vuln.get("modified"),
summary=vuln.get("details") or "",
severity=severity,
cvss_score=cvss_score,
aliases=aliases,
cve_ids=[item for item in aliases if item and item.startswith("CVE-")],
ghsa_ids=[item for item in aliases if item and item.startswith("GHSA-")],
osv_ids=[vuln.get("id")] if vuln.get("id") else [],
affected_versions=_affected_versions(vuln),
fixed_versions=_fixed_versions(vuln),
package_name=package_name,
references=refs,
raw=vuln,
)
)
return candidates

查看文件

@@ -0,0 +1,51 @@
from __future__ import annotations
import xml.etree.ElementTree as ET
from typing import Any, Dict, List
import requests
from intel.models import Candidate
def _text(node: ET.Element, name: str) -> str:
child = node.find(name)
return child.text.strip() if child is not None and child.text else ""
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
response.raise_for_status()
root = ET.fromstring(response.content)
keywords = {kw.lower() for kw in source.get("keywords", [])}
items = root.findall(".//item")
candidates: List[Candidate] = []
for item in items[: source.get("max_items", 50)]:
title = _text(item, "title")
link = _text(item, "link") or source["url"]
description = _text(item, "description")
if keywords:
haystack = " ".join([title, description]).lower()
if not any(keyword in haystack for keyword in keywords):
continue
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=link,
title=title or f"RSS entry for {system['display_name']}",
published_at=_text(item, "pubDate"),
updated_at=_text(item, "pubDate"),
summary=description,
severity="unknown",
references=[link],
raw={"title": title, "link": link},
)
)
return candidates

查看文件

@@ -0,0 +1,57 @@
from __future__ import annotations
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
from intel.models import Candidate
from intel.utils import parse_dt
from . import cisa_kev, github_global, html_links, nvd_api, osv_api, rss_feed
HANDLERS = {
"ghsa-global": github_global.fetch,
"osv-batch": osv_api.fetch,
"kev-json": cisa_kev.fetch,
"nvd-search": nvd_api.fetch,
"rss-feed": rss_feed.fetch,
"html-links": html_links.fetch,
}
def _passes_since(candidate: Candidate, since_dt: Optional[datetime], include_undated: bool) -> bool:
if since_dt is None:
return True
timestamps = [parse_dt(candidate.updated_at), parse_dt(candidate.published_at)]
valid = [item for item in timestamps if item is not None]
if not valid:
return include_undated
return max(valid) >= since_dt
def collect_candidates(
source_map: Dict[str, Any],
since_dt: Optional[datetime] = None,
tier: Optional[str] = None,
include_undated: bool = False,
) -> Tuple[List[Candidate], List[str]]:
all_candidates: List[Candidate] = []
failures: List[str] = []
for system in source_map["systems"]:
if tier and system.get("tier") != tier:
continue
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
for source in system.get(bucket_name, []):
handler = HANDLERS.get(source["kind"])
if handler is None:
failures.append(f"Unsupported source kind {source['kind']} for {system['system_id']}")
continue
try:
items = handler(system, source)
for item in items:
if _passes_since(item, since_dt, include_undated):
all_candidates.append(item)
except Exception as exc:
failures.append(f"{system['system_id']}::{source['name']}::{exc.__class__.__name__}")
return all_candidates, failures

150
scripts/intel/utils.py 普通文件
查看文件

@@ -0,0 +1,150 @@
from __future__ import annotations
import hashlib
import json
import re
import subprocess
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional
UTC = timezone.utc
def now_utc() -> datetime:
return datetime.now(tz=UTC)
def isoformat(dt: datetime) -> str:
return dt.astimezone(UTC).replace(microsecond=0).isoformat()
def parse_dt(value: Optional[str]) -> Optional[datetime]:
if not value:
return None
if not isinstance(value, str):
return None
value = value.strip()
if value.endswith("Z"):
value = value[:-1] + "+00:00"
for fmt in (
None,
"%a, %d %b %Y %H:%M:%S %z",
"%Y-%m-%d",
"%Y-%m-%d %H:%M:%S",
):
try:
if fmt is None:
parsed = datetime.fromisoformat(value)
return parsed if parsed.tzinfo is not None else parsed.replace(tzinfo=UTC)
parsed = datetime.strptime(value, fmt)
return parsed if parsed.tzinfo is not None else parsed.replace(tzinfo=UTC)
except ValueError:
continue
return None
def parse_since(value: str, default_days: int = 30) -> datetime:
value = (value or "").strip()
if not value:
return now_utc() - timedelta(days=default_days)
match = re.fullmatch(r"(\d+)d", value)
if match:
return now_utc() - timedelta(days=int(match.group(1)))
parsed = parse_dt(value)
if parsed:
if parsed.tzinfo is None:
return parsed.replace(tzinfo=UTC)
return parsed.astimezone(UTC)
raise ValueError(f"Unsupported --since value: {value}")
def slugify(value: str) -> str:
value = value.lower().strip()
value = re.sub(r"[^a-z0-9]+", "-", value)
value = re.sub(r"-+", "-", value).strip("-")
return value or "item"
def short_hash(*parts: str) -> str:
digest = hashlib.sha1("::".join(parts).encode("utf-8")).hexdigest()
return digest[:10]
def ensure_dir(path: Path) -> None:
path.mkdir(parents=True, exist_ok=True)
def read_json(path: Path, default: Any = None) -> Any:
if not path.exists():
return default
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)
def write_json(path: Path, data: Any) -> None:
ensure_dir(path.parent)
with path.open("w", encoding="utf-8") as handle:
json.dump(data, handle, indent=2, ensure_ascii=True, sort_keys=False)
handle.write("\n")
def write_text(path: Path, content: str) -> None:
ensure_dir(path.parent)
with path.open("w", encoding="utf-8") as handle:
handle.write(content.rstrip() + "\n")
def run(cmd: List[str], cwd: Optional[Path] = None, check: bool = True) -> subprocess.CompletedProcess:
return subprocess.run(
cmd,
cwd=str(cwd) if cwd else None,
check=check,
text=True,
capture_output=True,
)
def load_all_json(path: Path) -> List[Dict[str, Any]]:
items: List[Dict[str, Any]] = []
if not path.exists():
return items
for file_path in sorted(path.glob("*.json")):
content = read_json(file_path, default=None)
if isinstance(content, dict):
items.append(content)
return items
def unique(values: Iterable[str]) -> List[str]:
seen = set()
result = []
for value in values:
if not value:
continue
if value in seen:
continue
seen.add(value)
result.append(value)
return result
def severity_rank(value: Optional[str]) -> int:
order = {
"critical": 5,
"high": 4,
"important": 4,
"medium": 3,
"moderate": 3,
"low": 2,
"info": 1,
"unknown": 0,
None: 0,
}
return order.get((value or "").lower(), 0)
def best_severity(values: Iterable[Optional[str]]) -> str:
ordered = sorted(values, key=severity_rank, reverse=True)
return next((value for value in ordered if value), "unknown")

97
scripts/intel/validators.py 普通文件
查看文件

@@ -0,0 +1,97 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
from intel.config import FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, ROOT, SECURE_CODE_ROOT, SOURCE_MAP_PATH, SYSTEMS_DIR
from intel.render import LANGUAGES, TOPIC_DESCRIPTIONS
from intel.utils import load_all_json
REQUIRED_REGISTRY_FIELDS = {
"canonical_id",
"system_id",
"title",
"severity",
"source_confidence",
"status",
}
REQUIRED_SYSTEM_FIELDS = {
"system_id",
"display_name",
"category",
"tier",
"advisory_modes",
"official_sources",
"ecosystem_sources",
"research_sources",
"output_dir",
"secure_code_topics",
"render_policy",
}
def validate(source_map: Dict[str, Any]) -> List[str]:
errors: List[str] = []
if not SOURCE_MAP_PATH.exists():
errors.append("source-map.yaml is missing")
systems = source_map.get("systems", [])
ids = set()
groups = set()
for system in systems:
missing = REQUIRED_SYSTEM_FIELDS - set(system.keys())
if missing:
errors.append(f"system missing required fields: {system.get('system_id', 'unknown')} -> {sorted(missing)}")
system_id = system["system_id"]
if system_id in ids:
errors.append(f"duplicate system_id: {system_id}")
ids.add(system_id)
output_dir = Path(system["output_dir"])
if len(output_dir.parts) < 3:
errors.append(f"invalid output_dir for {system_id}: {system['output_dir']}")
continue
groups.add(output_dir.parts[1])
system_root = ROOT / output_dir
if not (system_root / "README.md").exists():
errors.append(f"system README missing: {system_root / 'README.md'}")
if not (system_root / "INDEX.md").exists():
errors.append(f"system INDEX missing: {system_root / 'INDEX.md'}")
if not (SYSTEMS_DIR / f"{system_id}.json").exists():
errors.append(f"system registry summary missing: {SYSTEMS_DIR / f'{system_id}.json'}")
if not (FRAMEWORK_ROOT / "README.md").exists():
errors.append(f"framework root README missing: {FRAMEWORK_ROOT / 'README.md'}")
for group in groups:
if not (FRAMEWORK_ROOT / group / "README.md").exists():
errors.append(f"group README missing: {FRAMEWORK_ROOT / group / 'README.md'}")
for item in load_all_json(REGISTRY_ROOT / "advisories"):
missing = REQUIRED_REGISTRY_FIELDS - set(item.keys())
if missing:
errors.append(f"registry advisory missing fields: {item.get('canonical_id', 'unknown')} -> {sorted(missing)}")
for path in [
GENERATED_DIR / "coverage-matrix.md",
GENERATED_DIR / "latest-ingest.md",
GENERATED_DIR / "run-summary.json",
ROOT / "08-threat-intel" / "registry" / "source-confidence.md",
]:
if not path.exists():
errors.append(f"generated artifact missing: {path}")
if not (SECURE_CODE_ROOT / "README.md").exists():
errors.append(f"secure-code README missing: {SECURE_CODE_ROOT / 'README.md'}")
for language in LANGUAGES:
language_dir = SECURE_CODE_ROOT / language
if not (language_dir / "README.md").exists():
errors.append(f"language README missing: {language_dir / 'README.md'}")
if not (language_dir / "INDEX.md").exists():
errors.append(f"language INDEX missing: {language_dir / 'INDEX.md'}")
for topic in TOPIC_DESCRIPTIONS:
if not (language_dir / f"{topic}.md").exists():
errors.append(f"secure-code topic missing: {language_dir / f'{topic}.md'}")
return errors