kb: expand authorized lab coverage and intel automation
这个提交包含在:
1
scripts/intel/__init__.py
普通文件
1
scripts/intel/__init__.py
普通文件
@@ -0,0 +1 @@
|
||||
"""Websafe threat intelligence automation package."""
|
||||
45
scripts/intel/config.py
普通文件
45
scripts/intel/config.py
普通文件
@@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
FRAMEWORK_ROOT = ROOT / "07-framework-security"
|
||||
THREAT_INTEL_ROOT = ROOT / "08-threat-intel"
|
||||
REGISTRY_ROOT = THREAT_INTEL_ROOT / "registry"
|
||||
ADVISORIES_DIR = REGISTRY_ROOT / "advisories"
|
||||
SYSTEMS_DIR = REGISTRY_ROOT / "systems"
|
||||
TRIAGE_DIR = REGISTRY_ROOT / "triage"
|
||||
GENERATED_DIR = THREAT_INTEL_ROOT / "generated"
|
||||
SECURE_CODE_ROOT = ROOT / "05-defense" / "secure-code"
|
||||
SOURCE_MAP_PATH = THREAT_INTEL_ROOT / "source-map.yaml"
|
||||
STATE_DIR = Path.home() / ".local" / "state" / "websafe-intel"
|
||||
STATE_PATH = STATE_DIR / "state.json"
|
||||
|
||||
|
||||
def load_source_map() -> Dict[str, Any]:
|
||||
with SOURCE_MAP_PATH.open("r", encoding="utf-8") as handle:
|
||||
data = yaml.safe_load(handle)
|
||||
|
||||
if not isinstance(data, dict) or "systems" not in data:
|
||||
raise ValueError("source-map.yaml must contain a top-level 'systems' list")
|
||||
|
||||
systems = data["systems"]
|
||||
if not isinstance(systems, list):
|
||||
raise ValueError("'systems' must be a list")
|
||||
return data
|
||||
|
||||
|
||||
def get_systems_by_group(source_map: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
|
||||
groups: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for system in source_map["systems"]:
|
||||
output_dir = Path(system["output_dir"])
|
||||
parts = output_dir.parts
|
||||
if len(parts) < 3:
|
||||
raise ValueError(f"output_dir too short for system {system['system_id']}")
|
||||
group = parts[1]
|
||||
groups.setdefault(group, []).append(system)
|
||||
return groups
|
||||
290
scripts/intel/main.py
普通文件
290
scripts/intel/main.py
普通文件
@@ -0,0 +1,290 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
CURRENT_DIR = Path(__file__).resolve().parent
|
||||
SCRIPTS_DIR = CURRENT_DIR.parent
|
||||
if str(SCRIPTS_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SCRIPTS_DIR))
|
||||
|
||||
from intel.config import ADVISORIES_DIR, GENERATED_DIR, STATE_DIR, STATE_PATH, TRIAGE_DIR, load_source_map # noqa: E402
|
||||
from intel.models import AdvisoryRecord # noqa: E402
|
||||
from intel.normalize import normalize_candidates # noqa: E402
|
||||
from intel.pr import open_pr # noqa: E402
|
||||
from intel.render import render_case_pages, render_generated, render_registry, render_secure_code, render_system_scaffolding # noqa: E402
|
||||
from intel.route import route_advisories # noqa: E402
|
||||
from intel.sources.runner import collect_candidates # noqa: E402
|
||||
from intel.utils import isoformat, load_all_json, now_utc, parse_since, read_json, write_json # noqa: E402
|
||||
from intel.validators import validate # noqa: E402
|
||||
|
||||
|
||||
def _load_existing_advisories() -> List[AdvisoryRecord]:
|
||||
advisories: List[AdvisoryRecord] = []
|
||||
for item in load_all_json(ADVISORIES_DIR):
|
||||
try:
|
||||
advisories.append(AdvisoryRecord(**item))
|
||||
except TypeError:
|
||||
continue
|
||||
return advisories
|
||||
|
||||
|
||||
def _load_existing_triage() -> List[Dict[str, Any]]:
|
||||
return load_all_json(TRIAGE_DIR)
|
||||
|
||||
|
||||
def _filter_source_map(source_map: Dict[str, Any], system_ids: List[str] | None) -> Dict[str, Any]:
|
||||
if not system_ids:
|
||||
return source_map
|
||||
allowed = set(system_ids)
|
||||
filtered = [system for system in source_map["systems"] if system["system_id"] in allowed]
|
||||
found = {system["system_id"] for system in filtered}
|
||||
missing = sorted(allowed - found)
|
||||
if missing:
|
||||
raise ValueError(f"Unknown system_id(s): {', '.join(missing)}")
|
||||
return {**source_map, "systems": filtered}
|
||||
|
||||
|
||||
def _merge_preserved_records(
|
||||
selected_source_map: Dict[str, Any],
|
||||
advisories: List[AdvisoryRecord],
|
||||
triage: List[Dict[str, Any]],
|
||||
) -> tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
|
||||
selected_ids = {system["system_id"] for system in selected_source_map["systems"]}
|
||||
preserved_advisories = [item for item in _load_existing_advisories() if item.system_id not in selected_ids]
|
||||
preserved_triage = [item for item in _load_existing_triage() if item.get("system_id") not in selected_ids]
|
||||
return preserved_advisories + advisories, preserved_triage + triage
|
||||
|
||||
|
||||
def _summarize_changes(advisories: List[AdvisoryRecord]) -> Dict[str, Any]:
|
||||
new_count = 0
|
||||
updated_count = 0
|
||||
touched = set()
|
||||
for advisory in advisories:
|
||||
path = ADVISORIES_DIR / f"{advisory.canonical_id}.json"
|
||||
existing = read_json(path, default=None)
|
||||
current = advisory.to_dict()
|
||||
if existing is None:
|
||||
new_count += 1
|
||||
touched.add(advisory.system_id)
|
||||
continue
|
||||
if existing != current:
|
||||
updated_count += 1
|
||||
touched.add(advisory.system_id)
|
||||
return {
|
||||
"new_count": new_count,
|
||||
"updated_count": updated_count,
|
||||
"systems_touched": sorted(touched),
|
||||
}
|
||||
|
||||
|
||||
def _select_hotlane(
|
||||
advisories: List[AdvisoryRecord],
|
||||
triage: List[Dict[str, Any]],
|
||||
) -> Tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
|
||||
filtered = []
|
||||
keep_ids = set()
|
||||
for advisory in advisories:
|
||||
if advisory.exploit_status in {"known_exploited", "active_exploitation", "in_the_wild"}:
|
||||
filtered.append(advisory)
|
||||
keep_ids.add(advisory.canonical_id)
|
||||
continue
|
||||
if advisory.cvss_score is not None and advisory.cvss_score >= 8.8:
|
||||
filtered.append(advisory)
|
||||
keep_ids.add(advisory.canonical_id)
|
||||
continue
|
||||
if advisory.severity == "critical":
|
||||
filtered.append(advisory)
|
||||
keep_ids.add(advisory.canonical_id)
|
||||
filtered_triage = [item for item in triage if item.get("canonical_id") in keep_ids]
|
||||
return filtered, filtered_triage
|
||||
|
||||
|
||||
def _write_outputs(
|
||||
source_map: Dict[str, Any],
|
||||
advisories: List[AdvisoryRecord],
|
||||
triage: List[Dict[str, Any]],
|
||||
failures: List[str],
|
||||
change_summary: Dict[str, Any],
|
||||
) -> None:
|
||||
render_registry(source_map, advisories, triage)
|
||||
render_system_scaffolding(source_map, advisories)
|
||||
render_case_pages(advisories)
|
||||
render_secure_code(source_map)
|
||||
render_generated(source_map, advisories, triage, failures, change_summary)
|
||||
|
||||
|
||||
def pipeline(
|
||||
full_source_map: Dict[str, Any],
|
||||
source_map: Dict[str, Any],
|
||||
since_arg: str,
|
||||
tier: str | None,
|
||||
include_undated: bool,
|
||||
hotlane_only: bool = False,
|
||||
) -> tuple[list[AdvisoryRecord], list[Dict[str, Any]], list[str], Dict[str, Any]]:
|
||||
since_dt = None if tier == "history-full" else parse_since(since_arg, default_days=30)
|
||||
candidates, failures = collect_candidates(source_map, since_dt=since_dt, tier=tier, include_undated=include_undated)
|
||||
advisories, triage = normalize_candidates(candidates)
|
||||
advisories = route_advisories(source_map, advisories)
|
||||
if hotlane_only:
|
||||
advisories, triage = _select_hotlane(advisories, triage)
|
||||
change_summary = _summarize_changes(advisories)
|
||||
render_map = source_map
|
||||
if len(source_map["systems"]) != len(full_source_map["systems"]):
|
||||
advisories, triage = _merge_preserved_records(source_map, advisories, triage)
|
||||
render_map = full_source_map
|
||||
_write_outputs(render_map, advisories, triage, failures, change_summary)
|
||||
return advisories, triage, failures, change_summary
|
||||
|
||||
|
||||
def cmd_render(args) -> int:
|
||||
full_source_map = load_source_map()
|
||||
source_map = _filter_source_map(full_source_map, args.system)
|
||||
allowed = {system["system_id"] for system in source_map["systems"]}
|
||||
advisories = [item for item in _load_existing_advisories() if item.system_id in allowed]
|
||||
triage = [item for item in _load_existing_triage() if item.get("system_id") in allowed]
|
||||
summary = read_json(GENERATED_DIR / "run-summary.json", default={}) or {}
|
||||
failures = summary.get("failures", [])
|
||||
render_map = source_map
|
||||
if len(source_map["systems"]) != len(full_source_map["systems"]):
|
||||
advisories, triage = _merge_preserved_records(source_map, advisories, triage)
|
||||
render_map = full_source_map
|
||||
_write_outputs(render_map, advisories, triage, failures, summary)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_validate(args) -> int:
|
||||
source_map = _filter_source_map(load_source_map(), args.system)
|
||||
errors = validate(source_map)
|
||||
if errors:
|
||||
print("Validation failed:")
|
||||
for error in errors:
|
||||
print(f"- {error}")
|
||||
return 1
|
||||
print("Validation passed.")
|
||||
return 0
|
||||
|
||||
|
||||
def _write_state(status: str) -> None:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
state = read_json(STATE_PATH, default={}) or {}
|
||||
state["last_success"] = isoformat(now_utc())
|
||||
state["status"] = status
|
||||
write_json(STATE_PATH, state)
|
||||
|
||||
|
||||
def cmd_ingest(args) -> int:
|
||||
full_source_map = load_source_map()
|
||||
source_map = _filter_source_map(full_source_map, args.system)
|
||||
since = args.since
|
||||
if since == "last-success":
|
||||
state = read_json(STATE_PATH, default={}) or {}
|
||||
since = state.get("last_success", "30d")
|
||||
advisories, triage, failures, summary = pipeline(full_source_map, source_map, since, None, include_undated=False)
|
||||
_write_state("success")
|
||||
print(
|
||||
f"Ingested {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_hotlane(args) -> int:
|
||||
full_source_map = load_source_map()
|
||||
source_map = _filter_source_map(full_source_map, args.system)
|
||||
advisories, triage, failures, summary = pipeline(full_source_map, source_map, "1d", None, include_undated=False, hotlane_only=True)
|
||||
_write_state("success")
|
||||
print(
|
||||
f"Hotlane synced {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_reconcile(args) -> int:
|
||||
full_source_map = load_source_map()
|
||||
source_map = _filter_source_map(full_source_map, args.system)
|
||||
advisories, triage, failures, summary = pipeline(full_source_map, source_map, "30d", None, include_undated=False)
|
||||
_write_state("success")
|
||||
print(
|
||||
f"Reconciled {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_backfill(args) -> int:
|
||||
full_source_map = load_source_map()
|
||||
source_map = _filter_source_map(full_source_map, args.system)
|
||||
if args.dry_run:
|
||||
candidates, failures = collect_candidates(source_map, since_dt=None, tier=args.tier, include_undated=True)
|
||||
advisories, triage = normalize_candidates(candidates)
|
||||
advisories = route_advisories(source_map, advisories)
|
||||
if args.hotlane_only:
|
||||
advisories, triage = _select_hotlane(advisories, triage)
|
||||
print(
|
||||
f"Dry run backfill tier={args.tier}: candidates={len(candidates)} advisories={len(advisories)} triage={len(triage)} failures={len(failures)}"
|
||||
)
|
||||
return 0
|
||||
advisories, triage, failures, summary = pipeline(
|
||||
full_source_map,
|
||||
source_map,
|
||||
"",
|
||||
args.tier,
|
||||
include_undated=True,
|
||||
hotlane_only=args.hotlane_only,
|
||||
)
|
||||
print(
|
||||
f"Backfilled {len(advisories)} advisories, new {summary['new_count']}, updated {summary['updated_count']}, triage {len(triage)}, failures {len(failures)}"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_open_pr(args) -> int:
|
||||
print(open_pr(base_branch=args.base, dry_run=args.dry_run))
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Websafe threat intel automation")
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
backfill = subparsers.add_parser("backfill", help="Fetch historical advisories")
|
||||
backfill.add_argument("--tier", choices=["history-full", "rolling-24m"], required=True)
|
||||
backfill.add_argument("--dry-run", action="store_true")
|
||||
backfill.add_argument("--hotlane-only", action="store_true")
|
||||
backfill.add_argument("--system", action="append")
|
||||
backfill.set_defaults(func=cmd_backfill)
|
||||
|
||||
ingest = subparsers.add_parser("ingest", help="Fetch incremental advisories")
|
||||
ingest.add_argument("--since", default="last-success")
|
||||
ingest.add_argument("--system", action="append")
|
||||
ingest.set_defaults(func=cmd_ingest)
|
||||
|
||||
hotlane = subparsers.add_parser("hotlane", help="Fetch only KEV / in-the-wild / critical updates")
|
||||
hotlane.add_argument("--system", action="append")
|
||||
hotlane.set_defaults(func=cmd_hotlane)
|
||||
|
||||
reconcile = subparsers.add_parser("reconcile", help="Reconcile the last 30 days of updates")
|
||||
reconcile.add_argument("--system", action="append")
|
||||
reconcile.set_defaults(func=cmd_reconcile)
|
||||
|
||||
render = subparsers.add_parser("render", help="Render structure and secure-code pages")
|
||||
render.add_argument("--system", action="append")
|
||||
render.set_defaults(func=cmd_render)
|
||||
|
||||
validate_parser = subparsers.add_parser("validate", help="Validate generated content")
|
||||
validate_parser.add_argument("--system", action="append")
|
||||
validate_parser.set_defaults(func=cmd_validate)
|
||||
|
||||
open_pr_parser = subparsers.add_parser("open-pr", help="Create Gitea PR from current changes")
|
||||
open_pr_parser.add_argument("--base", default="main")
|
||||
open_pr_parser.add_argument("--dry-run", action="store_true")
|
||||
open_pr_parser.set_defaults(func=cmd_open_pr)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
70
scripts/intel/models.py
普通文件
70
scripts/intel/models.py
普通文件
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Candidate:
|
||||
system_id: str
|
||||
display_name: str
|
||||
category: str
|
||||
advisory_mode: str
|
||||
source_kind: str
|
||||
source_name: str
|
||||
source_confidence: str
|
||||
source_url: str
|
||||
title: str
|
||||
published_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
summary: str = ""
|
||||
severity: str = "unknown"
|
||||
cvss_score: Optional[float] = None
|
||||
exploit_status: str = "unknown"
|
||||
aliases: List[str] = field(default_factory=list)
|
||||
cve_ids: List[str] = field(default_factory=list)
|
||||
ghsa_ids: List[str] = field(default_factory=list)
|
||||
osv_ids: List[str] = field(default_factory=list)
|
||||
affected_versions: List[str] = field(default_factory=list)
|
||||
fixed_versions: List[str] = field(default_factory=list)
|
||||
package_name: Optional[str] = None
|
||||
references: List[str] = field(default_factory=list)
|
||||
raw: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdvisoryRecord:
|
||||
canonical_id: str
|
||||
system_id: str
|
||||
display_name: str
|
||||
category: str
|
||||
advisory_mode: str
|
||||
title: str
|
||||
summary: str
|
||||
published_at: Optional[str]
|
||||
updated_at: Optional[str]
|
||||
severity: str
|
||||
cvss_score: Optional[float]
|
||||
exploit_status: str
|
||||
source_confidence: str
|
||||
official_source_url: Optional[str]
|
||||
secondary_source_urls: List[str]
|
||||
aliases: List[str]
|
||||
cve_ids: List[str]
|
||||
ghsa_ids: List[str]
|
||||
osv_ids: List[str]
|
||||
affected_versions: List[str]
|
||||
fixed_versions: List[str]
|
||||
package_name: Optional[str]
|
||||
render_markdown: bool
|
||||
case_path: Optional[str]
|
||||
secure_code_topics: List[str]
|
||||
status: str
|
||||
triage_reasons: List[str] = field(default_factory=list)
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
126
scripts/intel/normalize.py
普通文件
126
scripts/intel/normalize.py
普通文件
@@ -0,0 +1,126 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, Iterable, List, Tuple
|
||||
|
||||
from intel.models import AdvisoryRecord, Candidate
|
||||
from intel.utils import best_severity, short_hash, unique
|
||||
|
||||
|
||||
CONFIDENCE_ORDER = {
|
||||
"official": 4,
|
||||
"ecosystem-authority": 3,
|
||||
"research": 2,
|
||||
"triage-only": 1,
|
||||
}
|
||||
|
||||
|
||||
def _best_confidence(values: Iterable[str]) -> str:
|
||||
ordered = sorted(values, key=lambda value: CONFIDENCE_ORDER.get(value, 0), reverse=True)
|
||||
return next((value for value in ordered if value), "triage-only")
|
||||
|
||||
|
||||
def canonical_key(candidate: Candidate) -> str:
|
||||
for alias in candidate.cve_ids + candidate.ghsa_ids + candidate.osv_ids + candidate.aliases:
|
||||
if alias:
|
||||
return f"{candidate.system_id}::{alias}"
|
||||
return f"{candidate.system_id}::{short_hash(candidate.title, candidate.source_url)}"
|
||||
|
||||
|
||||
def normalize_candidates(candidates: List[Candidate]) -> Tuple[List[AdvisoryRecord], List[Dict[str, Any]]]:
|
||||
buckets: Dict[str, List[Candidate]] = defaultdict(list)
|
||||
for candidate in candidates:
|
||||
buckets[canonical_key(candidate)].append(candidate)
|
||||
|
||||
advisories: List[AdvisoryRecord] = []
|
||||
triage: List[Dict[str, Any]] = []
|
||||
|
||||
for key, items in sorted(buckets.items()):
|
||||
lead = sorted(
|
||||
items,
|
||||
key=lambda item: CONFIDENCE_ORDER.get(item.source_confidence, 0),
|
||||
reverse=True,
|
||||
)[0]
|
||||
confidence = _best_confidence(item.source_confidence for item in items)
|
||||
aliases = unique(alias for item in items for alias in item.aliases)
|
||||
cve_ids = unique(value for item in items for value in item.cve_ids)
|
||||
ghsa_ids = unique(value for item in items for value in item.ghsa_ids)
|
||||
osv_ids = unique(value for item in items for value in item.osv_ids)
|
||||
affected = unique(value for item in items for value in item.affected_versions)
|
||||
fixed = unique(value for item in items for value in item.fixed_versions)
|
||||
references = unique([item.source_url for item in items] + [ref for item in items for ref in item.references])
|
||||
|
||||
published = next((item.published_at for item in items if item.published_at), None)
|
||||
updated = next((item.updated_at for item in items if item.updated_at), published)
|
||||
severity = best_severity(item.severity for item in items)
|
||||
cvss = next((item.cvss_score for item in items if item.cvss_score is not None), None)
|
||||
exploit_status = next(
|
||||
(item.exploit_status for item in items if item.exploit_status and item.exploit_status != "unknown"),
|
||||
"unknown",
|
||||
)
|
||||
|
||||
official_refs = [
|
||||
item.source_url
|
||||
for item in items
|
||||
if item.source_confidence in {"official", "ecosystem-authority"} and item.source_url
|
||||
]
|
||||
triage_reasons = []
|
||||
status = "generated"
|
||||
if confidence not in {"official", "ecosystem-authority"}:
|
||||
triage_reasons.append("best source confidence below registry threshold")
|
||||
if not official_refs:
|
||||
triage_reasons.append("no official or ecosystem-authority source URL")
|
||||
if not (affected or fixed):
|
||||
triage_reasons.append("missing affected/fixed version details")
|
||||
|
||||
if triage_reasons:
|
||||
status = "triage"
|
||||
triage.append(
|
||||
{
|
||||
"canonical_id": key.replace("::", "--"),
|
||||
"system_id": lead.system_id,
|
||||
"title": lead.title,
|
||||
"reasons": triage_reasons,
|
||||
"candidate_count": len(items),
|
||||
"references": references,
|
||||
}
|
||||
)
|
||||
|
||||
advisories.append(
|
||||
AdvisoryRecord(
|
||||
canonical_id=key.replace("::", "--"),
|
||||
system_id=lead.system_id,
|
||||
display_name=lead.display_name,
|
||||
category=lead.category,
|
||||
advisory_mode=lead.advisory_mode,
|
||||
title=lead.title,
|
||||
summary=lead.summary,
|
||||
published_at=published,
|
||||
updated_at=updated,
|
||||
severity=severity,
|
||||
cvss_score=cvss,
|
||||
exploit_status=exploit_status,
|
||||
source_confidence=confidence,
|
||||
official_source_url=official_refs[0] if official_refs else (references[0] if references else None),
|
||||
secondary_source_urls=references[1:] if len(references) > 1 else [],
|
||||
aliases=aliases,
|
||||
cve_ids=cve_ids,
|
||||
ghsa_ids=ghsa_ids,
|
||||
osv_ids=osv_ids,
|
||||
affected_versions=affected,
|
||||
fixed_versions=fixed,
|
||||
package_name=lead.package_name,
|
||||
render_markdown=False,
|
||||
case_path=None,
|
||||
secure_code_topics=[],
|
||||
status=status,
|
||||
triage_reasons=triage_reasons,
|
||||
metadata={
|
||||
"source_names": unique(item.source_name for item in items),
|
||||
"source_kinds": unique(item.source_kind for item in items),
|
||||
"candidate_count": len(items),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return advisories, triage
|
||||
98
scripts/intel/pr.py
普通文件
98
scripts/intel/pr.py
普通文件
@@ -0,0 +1,98 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from intel.config import GENERATED_DIR, ROOT
|
||||
from intel.utils import read_json, run
|
||||
|
||||
|
||||
PR_PATHS = [
|
||||
"README.md",
|
||||
"05-defense/secure-code",
|
||||
"07-framework-security",
|
||||
"08-threat-intel",
|
||||
"requirements-intel.txt",
|
||||
"scripts/intel",
|
||||
]
|
||||
|
||||
|
||||
def create_branch_name() -> str:
|
||||
return "codex/intel-" + datetime.now().strftime("%Y%m%d-%H%M")
|
||||
|
||||
|
||||
def _parse_origin() -> Optional[dict]:
|
||||
result = run(["git", "-C", str(ROOT), "remote", "get-url", "origin"], check=False)
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
url = result.stdout.strip()
|
||||
match = re.match(r"https://([^/]+)/([^/]+)/([^/.]+)(?:\.git)?", url)
|
||||
if not match:
|
||||
return None
|
||||
return {"host": match.group(1), "owner": match.group(2), "repo": match.group(3), "url": url}
|
||||
|
||||
|
||||
def _changed_paths() -> list[str]:
|
||||
status = run(["git", "-C", str(ROOT), "status", "--short", "--", *PR_PATHS], check=False)
|
||||
lines = [line.rstrip() for line in status.stdout.splitlines() if line.strip()]
|
||||
return lines
|
||||
|
||||
|
||||
def open_pr(base_branch: str = "main", dry_run: bool = False) -> str:
|
||||
origin = _parse_origin()
|
||||
if not origin:
|
||||
raise RuntimeError("Unable to parse origin remote URL")
|
||||
|
||||
changed = _changed_paths()
|
||||
if not changed:
|
||||
return "No intel-related changes to submit"
|
||||
|
||||
branch = create_branch_name()
|
||||
if dry_run:
|
||||
preview = "\n".join(f"- {line}" for line in changed[:40])
|
||||
return f"Dry run only; would create branch {branch} with these paths:\n{preview}"
|
||||
|
||||
run(["git", "-C", str(ROOT), "checkout", "-b", branch])
|
||||
run(["git", "-C", str(ROOT), "add", "--", *PR_PATHS])
|
||||
run(["git", "-C", str(ROOT), "commit", "-m", f"intel: automated advisory ingest {branch}"])
|
||||
run(["git", "-C", str(ROOT), "push", "-u", "origin", branch])
|
||||
|
||||
token = os.environ.get("GITEA_TOKEN")
|
||||
if not token:
|
||||
return f"Pushed branch {branch}, but GITEA_TOKEN is not set; PR not created"
|
||||
|
||||
summary = read_json(GENERATED_DIR / "run-summary.json", default={}) or {}
|
||||
body_lines = [
|
||||
"Automated advisory ingest update.",
|
||||
"",
|
||||
f"- New advisories: {summary.get('new_count', 0)}",
|
||||
f"- Updated advisories: {summary.get('updated_count', 0)}",
|
||||
f"- Triage count: {summary.get('triage_count', 0)}",
|
||||
f"- Failure count: {len(summary.get('failures', []))}",
|
||||
]
|
||||
if summary.get("systems_touched"):
|
||||
body_lines.append(f"- Systems touched: {', '.join(summary['systems_touched'])}")
|
||||
if summary.get("failures"):
|
||||
body_lines.extend(["", "Failed source adapters:"])
|
||||
for failure in summary["failures"]:
|
||||
body_lines.append(f"- {failure}")
|
||||
|
||||
payload = {
|
||||
"title": f"Intel ingest {branch}",
|
||||
"head": branch,
|
||||
"base": base_branch,
|
||||
"body": "\n".join(body_lines),
|
||||
}
|
||||
response = requests.post(
|
||||
f"https://{origin['host']}/api/v1/repos/{origin['owner']}/{origin['repo']}/pulls",
|
||||
headers={"Authorization": f"token {token}", "Content-Type": "application/json"},
|
||||
json=payload,
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
pr_url = response.json().get("html_url") or response.json().get("url")
|
||||
return f"Created PR: {pr_url}"
|
||||
547
scripts/intel/render.py
普通文件
547
scripts/intel/render.py
普通文件
@@ -0,0 +1,547 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List
|
||||
|
||||
from intel.config import FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, ROOT, SECURE_CODE_ROOT, SYSTEMS_DIR, TRIAGE_DIR
|
||||
from intel.models import AdvisoryRecord
|
||||
from intel.utils import ensure_dir, isoformat, now_utc, write_json, write_text
|
||||
|
||||
|
||||
UTC = timezone.utc
|
||||
LANGUAGES = [
|
||||
"javascript-typescript",
|
||||
"nodejs",
|
||||
"java",
|
||||
"php",
|
||||
"python",
|
||||
"ruby",
|
||||
"csharp",
|
||||
"go",
|
||||
]
|
||||
|
||||
|
||||
TOPIC_DESCRIPTIONS = {
|
||||
"xss-output-encoding": "对不可信输入做上下文输出编码,避免 HTML、属性和脚本上下文执行。",
|
||||
"dom-sink-hardening": "限制 `innerHTML`、模板拼接和 DOM sink 的直接写入。",
|
||||
"csp-trusted-types": "用 CSP 和 Trusted Types 缩小前端执行面。",
|
||||
"token-cookie-storage": "避免把敏感令牌长期暴露在可读浏览器存储中。",
|
||||
"authz-server-side-recheck": "前置代理或 middleware 不能替代服务端最终鉴权。",
|
||||
"ssrf-url-validation": "对 URL、协议、IP 和重定向做 allowlist 校验。",
|
||||
"request-smuggling-boundary": "统一代理层和应用层的请求边界解释。",
|
||||
"path-traversal-guard": "标准化路径并限制访问根目录。",
|
||||
"file-upload-validation": "校验文件类型、名称、存储位置和执行权限。",
|
||||
"plugin-extension-trust-policy": "限制第三方插件、模块和主题的信任边界。",
|
||||
"dependency-upgrade-policy": "用自动化升级、锁版本和审查策略降低供应链风险。",
|
||||
"proxy-trust-boundary": "只信任明确代理并限制头部透传。",
|
||||
"deserialization-safety": "避免对不可信数据做危险反序列化。",
|
||||
"template-injection-guard": "模板上下文中禁用危险表达式执行。",
|
||||
}
|
||||
|
||||
TOPIC_SCENARIOS = {
|
||||
"xss-output-encoding": "适用于模板输出、服务端渲染片段和后台管理界面回显场景。",
|
||||
"dom-sink-hardening": "适用于前端模板拼接、Markdown 渲染器和富文本预览逻辑。",
|
||||
"csp-trusted-types": "适用于高风险前端应用、管理端和需要限制脚本执行面的页面。",
|
||||
"token-cookie-storage": "适用于浏览器端会话、管理接口令牌和单页应用认证态。",
|
||||
"authz-server-side-recheck": "适用于代理层放行、路由守卫和后端最终授权重新确认。",
|
||||
"ssrf-url-validation": "适用于 webhook、URL 导入、远程图片抓取和插件联动调用。",
|
||||
"request-smuggling-boundary": "适用于代理链、WAF、CDN 和应用服务器之间的请求解析边界。",
|
||||
"path-traversal-guard": "适用于下载、导入、附件预览和主题/模板读取路径。",
|
||||
"file-upload-validation": "适用于媒体上传、插件安装、主题导入和日志附件接收。",
|
||||
"plugin-extension-trust-policy": "适用于插件市场、主题仓库、第三方扩展和模块化系统。",
|
||||
"dependency-upgrade-policy": "适用于 lockfile、SBOM、CI 审查和供应链更新节奏治理。",
|
||||
"proxy-trust-boundary": "适用于真实 IP 透传、认证头转发和反向代理旁路风险。",
|
||||
"deserialization-safety": "适用于缓存、任务队列、对象恢复和跨服务消息传递。",
|
||||
"template-injection-guard": "适用于 SSR、模板引擎、邮件渲染和后台自定义视图。",
|
||||
}
|
||||
|
||||
BAD_GOOD_SNIPPETS = {
|
||||
"javascript-typescript": (
|
||||
"const output = `<div>${userInput}</div>`;",
|
||||
"const output = `<div>${escapeHtml(userInput)}</div>`;",
|
||||
),
|
||||
"nodejs": (
|
||||
"res.send(`<div>${req.query.q}</div>`);",
|
||||
"res.send(`<div>${escapeHtml(req.query.q)}</div>`);",
|
||||
),
|
||||
"java": (
|
||||
"response.getWriter().write(\"<div>\" + value + \"</div>\");",
|
||||
"response.getWriter().write(\"<div>\" + HtmlUtils.htmlEscape(value) + \"</div>\");",
|
||||
),
|
||||
"php": (
|
||||
"echo \"<div>{$value}</div>\";",
|
||||
"echo '<div>' . htmlspecialchars($value, ENT_QUOTES, 'UTF-8') . '</div>';",
|
||||
),
|
||||
"python": (
|
||||
"return f\"<div>{value}</div>\"",
|
||||
"return f\"<div>{escape(value)}</div>\"",
|
||||
),
|
||||
"ruby": (
|
||||
"render inline: \"<div>#{value}</div>\"",
|
||||
"render inline: \"<div>#{ERB::Util.html_escape(value)}</div>\"",
|
||||
),
|
||||
"csharp": (
|
||||
"return Content($\"<div>{value}</div>\", \"text/html\");",
|
||||
"return Content($\"<div>{HtmlEncoder.Default.Encode(value)}</div>\", \"text/html\");",
|
||||
),
|
||||
"go": (
|
||||
"fmt.Fprintf(w, \"<div>%s</div>\", value)",
|
||||
"template.HTMLEscape(w, []byte(value))",
|
||||
),
|
||||
}
|
||||
|
||||
SOURCE_KIND_URLS = {
|
||||
"ghsa-global": "https://github.com/advisories",
|
||||
"osv-batch": "https://osv.dev/",
|
||||
"nvd-search": "https://nvd.nist.gov/vuln/search",
|
||||
"kev-json": "https://www.cisa.gov/known-exploited-vulnerabilities-catalog",
|
||||
"rss-feed": "https://www.rssboard.org/rss-specification",
|
||||
}
|
||||
|
||||
TARGET_TYPES = ["lab-local", "lab-public", "authorized-third-party"]
|
||||
MINIMAL_VALIDATION_GUIDANCE = "最小化验证、只读探测、可审计回显、受控注入。"
|
||||
FORBIDDEN_SCENARIOS = [
|
||||
"无归属证明或无明确授权的公网目标",
|
||||
"知名公共网站或与测试无关的第三方资产",
|
||||
"会造成持久破坏、数据越权下载或不可回滚影响的动作",
|
||||
]
|
||||
|
||||
|
||||
def _recent_count(items: Iterable[AdvisoryRecord], days: int = 30) -> int:
|
||||
cutoff = now_utc() - timedelta(days=days)
|
||||
total = 0
|
||||
for item in items:
|
||||
for stamp in (item.updated_at, item.published_at):
|
||||
if not stamp:
|
||||
continue
|
||||
try:
|
||||
dt = datetime.fromisoformat(stamp.replace("Z", "+00:00")).astimezone(UTC)
|
||||
except ValueError:
|
||||
continue
|
||||
if dt >= cutoff:
|
||||
total += 1
|
||||
break
|
||||
return total
|
||||
|
||||
|
||||
def _group_name(output_dir: str) -> str:
|
||||
return Path(output_dir).parts[1]
|
||||
|
||||
|
||||
def _abs_repo_path(*parts: str) -> str:
|
||||
cleaned: List[str] = []
|
||||
for part in parts:
|
||||
if not part:
|
||||
continue
|
||||
cleaned.extend(Path(part).parts)
|
||||
return str(ROOT.joinpath(*cleaned))
|
||||
|
||||
|
||||
def _source_reference(source: Dict[str, Any]) -> str:
|
||||
url = source.get("url") or SOURCE_KIND_URLS.get(source.get("kind", ""))
|
||||
qualifiers = []
|
||||
if source.get("ecosystem"):
|
||||
qualifiers.append(f"ecosystem={source['ecosystem']}")
|
||||
if source.get("keyword"):
|
||||
qualifiers.append(f"keyword={source['keyword']}")
|
||||
if source.get("advisory_mode"):
|
||||
qualifiers.append(f"mode={source['advisory_mode']}")
|
||||
suffix = f" ({'; '.join(qualifiers)})" if qualifiers else ""
|
||||
if url:
|
||||
return f"`{source['confidence']}` [{source['name']}]({url}){suffix}"
|
||||
return f"`{source['confidence']}` {source['name']}{suffix}"
|
||||
|
||||
|
||||
def _clear_json_dir(path: Path) -> None:
|
||||
ensure_dir(path)
|
||||
for file_path in path.glob("*.json"):
|
||||
file_path.unlink()
|
||||
|
||||
|
||||
def render_system_scaffolding(source_map: Dict[str, Any], advisories: List[AdvisoryRecord]) -> None:
|
||||
grouped: Dict[str, List[AdvisoryRecord]] = defaultdict(list)
|
||||
for advisory in advisories:
|
||||
grouped[advisory.system_id].append(advisory)
|
||||
|
||||
groups: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||||
for system in source_map["systems"]:
|
||||
groups[_group_name(system["output_dir"])].append(system)
|
||||
system_dir = FRAMEWORK_ROOT / _group_name(system["output_dir"]) / system["system_id"]
|
||||
ensure_dir(system_dir / "cases")
|
||||
|
||||
items = sorted(grouped.get(system["system_id"], []), key=lambda item: item.published_at or "", reverse=True)
|
||||
markdown_count = len([item for item in items if item.render_markdown and item.case_path])
|
||||
index_lines = [
|
||||
f"# {system['display_name']}",
|
||||
"",
|
||||
"> `LAB ONLY` | `AUTHORIZED TARGETS ONLY` | 自动生成索引",
|
||||
"",
|
||||
f"- 系统 ID: `{system['system_id']}`",
|
||||
f"- 分类: `{system['category']}`",
|
||||
f"- 覆盖策略: `{system['tier']}`",
|
||||
f"- 总案例数: `{len(items)}`",
|
||||
f"- 近 30 天新增/更新: `{_recent_count(items)}`",
|
||||
f"- 重点 Markdown 案例数: `{markdown_count}`",
|
||||
f"- 最近渲染时间: `{isoformat(now_utc())}`",
|
||||
"",
|
||||
"## 目标约束",
|
||||
"",
|
||||
f"- 适用目标类型: `{', '.join(TARGET_TYPES)}`",
|
||||
"- 是否允许公网验证: `yes, but ownership or authorization is required`",
|
||||
"- 授权前提: 资产归属可证明,或已取得书面/明确授权。",
|
||||
f"- 最小化验证方式: {MINIMAL_VALIDATION_GUIDANCE}",
|
||||
f"- 禁止场景: {';'.join(FORBIDDEN_SCENARIOS)}",
|
||||
"",
|
||||
"## 来源",
|
||||
"",
|
||||
]
|
||||
for bucket in ("official_sources", "ecosystem_sources", "research_sources"):
|
||||
for source in system.get(bucket, []):
|
||||
index_lines.append(f"- {_source_reference(source)}")
|
||||
index_lines.extend(
|
||||
[
|
||||
"",
|
||||
"## 案例列表",
|
||||
"",
|
||||
"| 标题 | 严重度 | 状态 | 来源置信度 | 更新时间 | 案例页 |",
|
||||
"|------|--------|------|------------|----------|--------|",
|
||||
]
|
||||
)
|
||||
if items:
|
||||
for item in items:
|
||||
case_link = f"[link]({_abs_repo_path(item.case_path)})" if item.case_path else "-"
|
||||
timestamp = item.updated_at or item.published_at or ""
|
||||
index_lines.append(
|
||||
f"| {item.title} | `{item.severity}` | `{item.status}` | `{item.source_confidence}` | `{timestamp}` | {case_link} |"
|
||||
)
|
||||
else:
|
||||
index_lines.append("| No advisories yet | `n/a` | `empty` | `n/a` | `n/a` | - |")
|
||||
write_text(system_dir / "INDEX.md", "\n".join(index_lines))
|
||||
|
||||
system_registry_path = _abs_repo_path("08-threat-intel", "registry", "systems", f"{system['system_id']}.json")
|
||||
readme_lines = [
|
||||
f"# {system['display_name']}",
|
||||
"",
|
||||
"> `LAB ONLY` | `AUTHORIZED TARGETS ONLY`",
|
||||
"",
|
||||
f"- 分类: `{system['category']}`",
|
||||
f"- 覆盖层级: `{system['tier']}`",
|
||||
f"- Advisory 模式: {', '.join(system.get('advisory_modes', []))}",
|
||||
f"- 输出目录: `{system['output_dir']}`",
|
||||
f"- 修复主题: {', '.join(system.get('secure_code_topics', []))}",
|
||||
f"- 适用目标类型: `{', '.join(TARGET_TYPES)}`",
|
||||
"- 是否允许公网验证: `yes, but only for owned or authorized targets`",
|
||||
f"- 最小化验证方式: {MINIMAL_VALIDATION_GUIDANCE}",
|
||||
f"- 禁止场景: {';'.join(FORBIDDEN_SCENARIOS)}",
|
||||
"",
|
||||
f"- 自动索引: [INDEX.md]({_abs_repo_path(system['output_dir'], 'INDEX.md')})",
|
||||
f"- Registry 统计: [{system['system_id']}.json]({system_registry_path})",
|
||||
]
|
||||
write_text(system_dir / "README.md", "\n".join(readme_lines))
|
||||
|
||||
for group, systems in groups.items():
|
||||
lines = [
|
||||
f"# {group}",
|
||||
"",
|
||||
"> 自动生成系统分组索引",
|
||||
"",
|
||||
f"- 系统数量: `{len(systems)}`",
|
||||
"- 允许范围: `lab-local`, `lab-public`, `authorized-third-party`",
|
||||
"",
|
||||
]
|
||||
for system in sorted(systems, key=lambda item: item["display_name"].lower()):
|
||||
lines.append(f"- [{system['display_name']}]({_abs_repo_path(system['output_dir'], 'README.md')})")
|
||||
write_text(FRAMEWORK_ROOT / group / "README.md", "\n".join(lines))
|
||||
|
||||
root_lines = [
|
||||
"# 主流开源 Web 系统安全",
|
||||
"",
|
||||
"> `LAB ONLY` | `AUTHORIZED TARGETS ONLY`",
|
||||
"",
|
||||
f"- 系统总数: `{len(source_map['systems'])}`",
|
||||
"- 覆盖语境: 授权攻防实验、验证性注入、最小化验证、案例映射。",
|
||||
"- 不适用: 未授权公网目标、泛互联网枚举、默认生产推荐基线。",
|
||||
"",
|
||||
f"- [cms]({_abs_repo_path('07-framework-security', 'cms', 'README.md')})",
|
||||
f"- [ecommerce]({_abs_repo_path('07-framework-security', 'ecommerce', 'README.md')})",
|
||||
f"- [frameworks]({_abs_repo_path('07-framework-security', 'frameworks', 'README.md')})",
|
||||
f"- [servers]({_abs_repo_path('07-framework-security', 'servers', 'README.md')})",
|
||||
f"- [platforms]({_abs_repo_path('07-framework-security', 'platforms', 'README.md')})",
|
||||
]
|
||||
write_text(FRAMEWORK_ROOT / "README.md", "\n".join(root_lines))
|
||||
|
||||
|
||||
def render_case_pages(advisories: List[AdvisoryRecord]) -> None:
|
||||
for item in advisories:
|
||||
if not item.render_markdown or not item.case_path:
|
||||
continue
|
||||
lines = [
|
||||
"---",
|
||||
f'title: "{item.title.replace(chr(34), chr(39))}"',
|
||||
f'system_id: "{item.system_id}"',
|
||||
f'category: "{item.category}"',
|
||||
f'advisory_mode: "{item.advisory_mode}"',
|
||||
f'published_date: "{item.published_at or ""}"',
|
||||
f'updated_date: "{item.updated_at or item.published_at or ""}"',
|
||||
f'severity: "{item.severity}"',
|
||||
f'exploit_status: "{item.exploit_status}"',
|
||||
f'source_confidence: "{item.source_confidence}"',
|
||||
'target_types:',
|
||||
' - "lab-local"',
|
||||
' - "lab-public"',
|
||||
' - "authorized-third-party"',
|
||||
'allow_public_validation: "yes, with ownership or explicit authorization"',
|
||||
'authorization_prerequisite: "asset ownership proof or explicit written authorization"',
|
||||
'minimal_validation: "read-only probe, controlled payload, reversible test"',
|
||||
"aliases:",
|
||||
]
|
||||
for alias in item.aliases:
|
||||
lines.append(f' - "{alias}"')
|
||||
lines.append("affected_versions:")
|
||||
for version in item.affected_versions[:20]:
|
||||
lines.append(f' - "{version}"')
|
||||
lines.append("fixed_versions:")
|
||||
for version in item.fixed_versions[:20]:
|
||||
lines.append(f' - "{version}"')
|
||||
lines.append("secure_code_topics:")
|
||||
for topic in item.secure_code_topics:
|
||||
lines.append(f' - "{topic}"')
|
||||
lines.extend(
|
||||
[
|
||||
f'primary_source: "{item.official_source_url or ""}"',
|
||||
"---",
|
||||
"",
|
||||
f"# {item.title}",
|
||||
"",
|
||||
"## 事件层",
|
||||
"",
|
||||
f"- Canonical ID: `{item.canonical_id}`",
|
||||
f"- 系统: `{item.system_id}`",
|
||||
f"- 严重度: `{item.severity}`",
|
||||
f"- 来源置信度: `{item.source_confidence}`",
|
||||
f"- 官方主源: {item.official_source_url or '-'}",
|
||||
f"- 影响版本: `{', '.join(item.affected_versions[:10]) or 'unknown'}`",
|
||||
f"- 修复版本: `{', '.join(item.fixed_versions[:10]) or 'unknown'}`",
|
||||
"",
|
||||
"## 其他来源",
|
||||
"",
|
||||
]
|
||||
)
|
||||
if item.secondary_source_urls:
|
||||
for ref in item.secondary_source_urls[:20]:
|
||||
lines.append(f"- {ref}")
|
||||
else:
|
||||
lines.append("- 无额外来源")
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
"## 实验层",
|
||||
"",
|
||||
"- 仅用于自有资产、测试环境或已明确授权目标。",
|
||||
"- 允许公网可达目标,但必须满足资产归属或明确授权前提。",
|
||||
f"- 最小化验证方式: {MINIMAL_VALIDATION_GUIDANCE}",
|
||||
"- 若该案例涉及插件、模块或扩展,应同时检查供应链与升级策略。",
|
||||
f"- 禁止场景: {';'.join(FORBIDDEN_SCENARIOS)}",
|
||||
"",
|
||||
"## 修复示例",
|
||||
"",
|
||||
]
|
||||
)
|
||||
for topic in item.secure_code_topics:
|
||||
for language in LANGUAGES:
|
||||
path = SECURE_CODE_ROOT / language / f"{topic}.md"
|
||||
if path.exists():
|
||||
lines.append(f"- [{language}:{topic}]({_abs_repo_path('05-defense', 'secure-code', language, f'{topic}.md')})")
|
||||
write_text(ROOT / item.case_path, "\n".join(lines))
|
||||
|
||||
|
||||
def render_registry(source_map: Dict[str, Any], advisories: List[AdvisoryRecord], triage: List[Dict[str, Any]]) -> None:
|
||||
_clear_json_dir(REGISTRY_ROOT / "advisories")
|
||||
_clear_json_dir(REGISTRY_ROOT / "systems")
|
||||
_clear_json_dir(TRIAGE_DIR)
|
||||
|
||||
grouped: Dict[str, List[AdvisoryRecord]] = defaultdict(list)
|
||||
for advisory in advisories:
|
||||
write_json(REGISTRY_ROOT / "advisories" / f"{advisory.canonical_id}.json", advisory.to_dict())
|
||||
grouped[advisory.system_id].append(advisory)
|
||||
|
||||
triage_by_system: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||||
for item in triage:
|
||||
triage_by_system[item["system_id"]].append(item)
|
||||
write_json(TRIAGE_DIR / f"{item['canonical_id']}.json", item)
|
||||
|
||||
for system in source_map["systems"]:
|
||||
system_id = system["system_id"]
|
||||
items = grouped.get(system_id, [])
|
||||
payload = {
|
||||
"system_id": system_id,
|
||||
"display_name": system["display_name"],
|
||||
"category": system["category"],
|
||||
"tier": system["tier"],
|
||||
"total": len(items),
|
||||
"markdown_cases": len([item for item in items if item.case_path]),
|
||||
"triage_count": len(triage_by_system.get(system_id, [])),
|
||||
"latest_update": max((item.updated_at or item.published_at or "" for item in items), default=""),
|
||||
"output_dir": system["output_dir"],
|
||||
"secure_code_topics": system.get("secure_code_topics", []),
|
||||
"items": [item.canonical_id for item in sorted(items, key=lambda item: item.published_at or "", reverse=True)],
|
||||
}
|
||||
write_json(SYSTEMS_DIR / f"{system_id}.json", payload)
|
||||
|
||||
|
||||
def render_generated(
|
||||
source_map: Dict[str, Any],
|
||||
advisories: List[AdvisoryRecord],
|
||||
triage: List[Dict[str, Any]],
|
||||
failures: List[str],
|
||||
change_summary: Dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
ensure_dir(GENERATED_DIR)
|
||||
systems = {item["system_id"]: item for item in source_map["systems"]}
|
||||
change_summary = change_summary or {}
|
||||
triage_by_system: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||||
for item in triage:
|
||||
triage_by_system[item["system_id"]].append(item)
|
||||
|
||||
coverage_lines = [
|
||||
"# 覆盖矩阵",
|
||||
"",
|
||||
"| 系统 | 分类 | 覆盖策略 | 历史全量 | 近两年全量 | 全量 registry | 重点案例 Markdown | secure-code 关联 | 自动同步状态 | triage | 最近更新 |",
|
||||
"|------|------|----------|----------|------------|--------------|--------------------|------------------|--------------|--------|----------|",
|
||||
]
|
||||
by_system: Dict[str, List[AdvisoryRecord]] = defaultdict(list)
|
||||
for advisory in advisories:
|
||||
by_system[advisory.system_id].append(advisory)
|
||||
for system_id, system in sorted(systems.items()):
|
||||
items = by_system.get(system_id, [])
|
||||
markdown_count = len([item for item in items if item.case_path])
|
||||
sync_state = "seeded" if items else "scaffolded"
|
||||
recent = max((item.updated_at or item.published_at or "" for item in items), default="")
|
||||
coverage_lines.append(
|
||||
f"| {system['display_name']} | `{system['category']}` | `{system['tier']}` | `{'yes' if system['tier'] == 'history-full' else '-'}` | `yes` | `{len(items)}` | `{markdown_count}` | `{len(system.get('secure_code_topics', []))}` | `{sync_state}` | `{len(triage_by_system.get(system_id, []))}` | `{recent}` |"
|
||||
)
|
||||
write_text(GENERATED_DIR / "coverage-matrix.md", "\n".join(coverage_lines))
|
||||
|
||||
markdown_total = len([item for item in advisories if item.case_path])
|
||||
latest_lines = [
|
||||
"# 最新同步摘要",
|
||||
"",
|
||||
f"- 渲染时间: `{isoformat(now_utc())}`",
|
||||
f"- 系统数量: `{len(source_map['systems'])}`",
|
||||
f"- Advisory 数量: `{len(advisories)}`",
|
||||
f"- 重点 Markdown 数量: `{markdown_total}`",
|
||||
f"- 新增记录: `{change_summary.get('new_count', 0)}`",
|
||||
f"- 更新记录: `{change_summary.get('updated_count', 0)}`",
|
||||
f"- Triage 数量: `{len(triage)}`",
|
||||
f"- 失败的 source adapter: `{len(failures)}`",
|
||||
"",
|
||||
]
|
||||
if failures:
|
||||
latest_lines.extend(["## 失败列表", ""])
|
||||
for failure in failures:
|
||||
latest_lines.append(f"- {failure}")
|
||||
write_text(GENERATED_DIR / "latest-ingest.md", "\n".join(latest_lines))
|
||||
write_json(
|
||||
GENERATED_DIR / "run-summary.json",
|
||||
{
|
||||
"generated_at": isoformat(now_utc()),
|
||||
"system_count": len(source_map["systems"]),
|
||||
"advisory_count": len(advisories),
|
||||
"markdown_count": markdown_total,
|
||||
"new_count": change_summary.get("new_count", 0),
|
||||
"updated_count": change_summary.get("updated_count", 0),
|
||||
"systems_touched": change_summary.get("systems_touched", []),
|
||||
"triage_count": len(triage),
|
||||
"failures": failures,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def render_secure_code(source_map: Dict[str, Any]) -> None:
|
||||
systems = source_map["systems"]
|
||||
related = defaultdict(set)
|
||||
for system in systems:
|
||||
for topic in system.get("secure_code_topics", []):
|
||||
related[topic].add(system["display_name"])
|
||||
|
||||
root_lines = [
|
||||
"# 安全编码修复库",
|
||||
"",
|
||||
"> `LAB ONLY` | 修复主题用于把实验发现映射回代码整改,不代表默认生产基线。",
|
||||
"",
|
||||
"- 语言范围: `javascript-typescript`, `nodejs`, `java`, `php`, `python`, `ruby`, `csharp`, `go`",
|
||||
"- 主题范围: 输出编码、DOM sink、CSP / Trusted Types、令牌存储、鉴权复核、SSRF、走私边界、路径穿越、文件上传、插件信任、依赖升级、代理信任、反序列化、模板注入。",
|
||||
"",
|
||||
]
|
||||
for language in LANGUAGES:
|
||||
root_lines.append(f"- [{language}]({_abs_repo_path('05-defense', 'secure-code', language, 'README.md')})")
|
||||
write_text(SECURE_CODE_ROOT / "README.md", "\n".join(root_lines))
|
||||
write_text(SECURE_CODE_ROOT / "INDEX.md", "\n".join(root_lines))
|
||||
|
||||
for language in LANGUAGES:
|
||||
language_dir = SECURE_CODE_ROOT / language
|
||||
ensure_dir(language_dir)
|
||||
index_lines = [
|
||||
f"# {language}",
|
||||
"",
|
||||
"> 自动生成修复主题索引",
|
||||
"",
|
||||
"- 语境: 授权攻防实验后的修复映射,不作为生产默认推荐模版。",
|
||||
"",
|
||||
]
|
||||
for topic, description in TOPIC_DESCRIPTIONS.items():
|
||||
index_lines.append(f"- [{topic}]({_abs_repo_path('05-defense', 'secure-code', language, f'{topic}.md')}) - {description}")
|
||||
|
||||
bad, good = BAD_GOOD_SNIPPETS[language]
|
||||
lines = [
|
||||
f"# {topic}",
|
||||
"",
|
||||
"> `LAB ONLY` | 修复主题页",
|
||||
"",
|
||||
f"- 语言: `{language}`",
|
||||
f"- 主题: `{topic}`",
|
||||
f"- 说明: {description}",
|
||||
f"- 典型场景: {TOPIC_SCENARIOS.get(topic, '把实验问题还原为可修复的代码模式。')}",
|
||||
"",
|
||||
"## 脆弱示例",
|
||||
"",
|
||||
f"```{_code_fence(language)}",
|
||||
bad,
|
||||
"```",
|
||||
"",
|
||||
"## 更安全的写法",
|
||||
"",
|
||||
f"```{_code_fence(language)}",
|
||||
good,
|
||||
"```",
|
||||
"",
|
||||
"## 检查清单",
|
||||
"",
|
||||
"- 明确输入边界与不可信来源",
|
||||
"- 在服务端或可信封装层统一做校验/转义/约束",
|
||||
"- 对关键路径补充自动化测试和依赖升级策略",
|
||||
"",
|
||||
"## 相关系统",
|
||||
"",
|
||||
]
|
||||
for display_name in sorted(related.get(topic, [])):
|
||||
lines.append(f"- {display_name}")
|
||||
write_text(language_dir / f"{topic}.md", "\n".join(lines))
|
||||
write_text(language_dir / "INDEX.md", "\n".join(index_lines))
|
||||
write_text(language_dir / "README.md", "\n".join(index_lines))
|
||||
|
||||
|
||||
def _code_fence(language: str) -> str:
|
||||
mapping = {
|
||||
"javascript-typescript": "ts",
|
||||
"nodejs": "js",
|
||||
"java": "java",
|
||||
"php": "php",
|
||||
"python": "py",
|
||||
"ruby": "rb",
|
||||
"csharp": "cs",
|
||||
"go": "go",
|
||||
}
|
||||
return mapping.get(language, "")
|
||||
91
scripts/intel/route.py
普通文件
91
scripts/intel/route.py
普通文件
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from intel.models import AdvisoryRecord
|
||||
from intel.utils import slugify
|
||||
|
||||
|
||||
TOPIC_KEYWORDS = {
|
||||
"xss-output-encoding": ["xss", "cross-site scripting"],
|
||||
"dom-sink-hardening": ["dom xss", "innerhtml", "outerhtml"],
|
||||
"csp-trusted-types": ["trusted types", "content security policy", "csp"],
|
||||
"token-cookie-storage": ["cookie", "token", "session", "jwt"],
|
||||
"authz-server-side-recheck": ["authorization bypass", "auth bypass", "improper authorization", "access control"],
|
||||
"ssrf-url-validation": ["ssrf", "server-side request forgery"],
|
||||
"request-smuggling-boundary": ["request smuggling", "http desync"],
|
||||
"path-traversal-guard": ["path traversal", "directory traversal"],
|
||||
"file-upload-validation": ["file upload", "upload"],
|
||||
"plugin-extension-trust-policy": ["plugin", "extension", "module", "theme"],
|
||||
"dependency-upgrade-policy": ["dependency", "supply chain", "advisory", "package"],
|
||||
"proxy-trust-boundary": ["proxy", "middleware", "reverse proxy", "header trust"],
|
||||
"deserialization-safety": ["deserialization", "serialization"],
|
||||
"template-injection-guard": ["template injection", "ssti"],
|
||||
}
|
||||
|
||||
HIGH_VALUE_TERMS = [
|
||||
"rce",
|
||||
"remote code execution",
|
||||
"authorization bypass",
|
||||
"auth bypass",
|
||||
"known_exploited",
|
||||
"known exploited",
|
||||
"ssrf",
|
||||
"deserialization",
|
||||
]
|
||||
|
||||
|
||||
def _pick_topics(system: Dict[str, Any], advisory: AdvisoryRecord) -> List[str]:
|
||||
haystack = " ".join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
advisory.title,
|
||||
advisory.summary,
|
||||
" ".join(advisory.aliases),
|
||||
],
|
||||
)
|
||||
).lower()
|
||||
topics = list(system.get("secure_code_topics", []))
|
||||
for topic, keywords in TOPIC_KEYWORDS.items():
|
||||
if any(keyword in haystack for keyword in keywords):
|
||||
topics.append(topic)
|
||||
# preserve order while deduping
|
||||
seen = set()
|
||||
result = []
|
||||
for topic in topics:
|
||||
if topic not in seen:
|
||||
seen.add(topic)
|
||||
result.append(topic)
|
||||
return result
|
||||
|
||||
|
||||
def _should_render(system: Dict[str, Any], advisory: AdvisoryRecord) -> bool:
|
||||
if advisory.status == "triage":
|
||||
return False
|
||||
policy = system.get("render_policy", {})
|
||||
if advisory.advisory_mode == "core" and policy.get("core_always_markdown", False):
|
||||
return True
|
||||
haystack = f"{advisory.title} {advisory.summary} {advisory.exploit_status}".lower()
|
||||
if advisory.exploit_status and advisory.exploit_status != "unknown":
|
||||
return True
|
||||
if advisory.cvss_score is not None and advisory.cvss_score >= 8.8:
|
||||
return True
|
||||
if advisory.severity in {"critical", "high"} and any(term in haystack for term in HIGH_VALUE_TERMS):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def route_advisories(source_map: Dict[str, Any], advisories: List[AdvisoryRecord]) -> List[AdvisoryRecord]:
|
||||
systems = {system["system_id"]: system for system in source_map["systems"]}
|
||||
routed: List[AdvisoryRecord] = []
|
||||
for advisory in advisories:
|
||||
system = systems[advisory.system_id]
|
||||
advisory.secure_code_topics = _pick_topics(system, advisory)
|
||||
advisory.render_markdown = _should_render(system, advisory)
|
||||
if advisory.render_markdown:
|
||||
slug = slugify("-".join(filter(None, [advisory.system_id, advisory.cve_ids[0] if advisory.cve_ids else advisory.ghsa_ids[0] if advisory.ghsa_ids else advisory.title])))
|
||||
advisory.case_path = str(Path(system["output_dir"]) / "cases" / f"{slug}.md")
|
||||
routed.append(advisory)
|
||||
return routed
|
||||
11
scripts/intel/run-hourly.sh
普通文件
11
scripts/intel/run-hourly.sh
普通文件
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
cd /Users/x/websafe
|
||||
LOG_DIR="/Users/x/websafe/08-threat-intel/generated/logs"
|
||||
mkdir -p "$LOG_DIR"
|
||||
STAMP="$(date '+%Y%m%d-%H%M%S')"
|
||||
exec >> "$LOG_DIR/hourly-$STAMP.log" 2>&1
|
||||
|
||||
echo "[hourly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
|
||||
python3 /Users/x/websafe/scripts/intel/main.py hotlane
|
||||
11
scripts/intel/run-nightly.sh
普通文件
11
scripts/intel/run-nightly.sh
普通文件
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
cd /Users/x/websafe
|
||||
LOG_DIR="/Users/x/websafe/08-threat-intel/generated/logs"
|
||||
mkdir -p "$LOG_DIR"
|
||||
STAMP="$(date '+%Y%m%d-%H%M%S')"
|
||||
exec >> "$LOG_DIR/nightly-$STAMP.log" 2>&1
|
||||
|
||||
echo "[nightly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
|
||||
python3 /Users/x/websafe/scripts/intel/main.py ingest --since last-success
|
||||
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
cd /Users/x/websafe
|
||||
LOG_DIR="/Users/x/websafe/08-threat-intel/generated/logs"
|
||||
mkdir -p "$LOG_DIR"
|
||||
STAMP="$(date '+%Y%m%d-%H%M%S')"
|
||||
exec >> "$LOG_DIR/weekly-$STAMP.log" 2>&1
|
||||
|
||||
echo "[weekly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
|
||||
python3 /Users/x/websafe/scripts/intel/main.py reconcile
|
||||
@@ -0,0 +1 @@
|
||||
"""Source adapters for advisory ingestion."""
|
||||
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
keywords = {kw.lower() for kw in source.get("keywords") or system.get("kev_keywords", []) or [system["display_name"]]}
|
||||
candidates: List[Candidate] = []
|
||||
for vuln in payload.get("vulnerabilities", []):
|
||||
haystack = " ".join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
vuln.get("vendorProject"),
|
||||
vuln.get("product"),
|
||||
vuln.get("vulnerabilityName"),
|
||||
vuln.get("shortDescription"),
|
||||
],
|
||||
)
|
||||
).lower()
|
||||
if not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
|
||||
cve = vuln.get("cveID")
|
||||
refs = [source["url"]]
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=source["url"],
|
||||
title=vuln.get("vulnerabilityName") or cve or f"KEV advisory for {system['display_name']}",
|
||||
published_at=vuln.get("dateAdded"),
|
||||
updated_at=vuln.get("dueDate"),
|
||||
summary=vuln.get("shortDescription") or "",
|
||||
severity="critical",
|
||||
exploit_status="known_exploited",
|
||||
aliases=unique([cve]),
|
||||
cve_ids=[cve] if cve else [],
|
||||
references=refs,
|
||||
raw=vuln,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
@@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
API_URL = "https://api.github.com/advisories"
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
headers = {"Accept": "application/vnd.github+json", "User-Agent": "websafe-intel"}
|
||||
token = os.environ.get("GITHUB_TOKEN")
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
page = 1
|
||||
packages = {
|
||||
item["name"].lower(): item["ecosystem"].lower()
|
||||
for item in system.get("package_names", [])
|
||||
if item.get("name") and item.get("ecosystem")
|
||||
}
|
||||
keyword_set = {value.lower() for value in system.get("ghsa_keywords", [])}
|
||||
candidates: List[Candidate] = []
|
||||
|
||||
while True:
|
||||
response = requests.get(
|
||||
API_URL,
|
||||
headers=headers,
|
||||
params={"per_page": 100, "page": page, "ecosystem": source.get("ecosystem")},
|
||||
timeout=30,
|
||||
)
|
||||
if response.status_code == 403 and "rate limit" in response.text.lower():
|
||||
raise requests.HTTPError("GitHub advisory rate limit exceeded; set GITHUB_TOKEN for higher quota", response=response)
|
||||
response.raise_for_status()
|
||||
advisories = response.json()
|
||||
if not advisories:
|
||||
break
|
||||
|
||||
for advisory in advisories:
|
||||
matched_vulns = []
|
||||
for vuln in advisory.get("vulnerabilities", []):
|
||||
package = (vuln.get("package") or {})
|
||||
package_name = (package.get("name") or "").lower()
|
||||
ecosystem = (package.get("ecosystem") or "").lower()
|
||||
if package_name in packages and packages[package_name] == ecosystem:
|
||||
matched_vulns.append(vuln)
|
||||
|
||||
haystack = " ".join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
advisory.get("summary"),
|
||||
advisory.get("description"),
|
||||
advisory.get("ghsa_id"),
|
||||
advisory.get("cve_id"),
|
||||
],
|
||||
)
|
||||
).lower()
|
||||
if not matched_vulns and keyword_set and not any(keyword in haystack for keyword in keyword_set):
|
||||
continue
|
||||
if not matched_vulns and not keyword_set:
|
||||
continue
|
||||
|
||||
affected_versions = []
|
||||
fixed_versions = []
|
||||
package_name = None
|
||||
for vuln in matched_vulns:
|
||||
if vuln.get("vulnerable_version_range"):
|
||||
affected_versions.append(vuln["vulnerable_version_range"])
|
||||
patched = vuln.get("first_patched_version") or {}
|
||||
if patched.get("identifier"):
|
||||
fixed_versions.append(patched["identifier"])
|
||||
if not package_name and vuln.get("package"):
|
||||
package_name = vuln["package"].get("name")
|
||||
|
||||
aliases = unique(
|
||||
[
|
||||
advisory.get("ghsa_id"),
|
||||
advisory.get("cve_id"),
|
||||
*(advisory.get("identifiers") or []),
|
||||
]
|
||||
)
|
||||
cve_ids = [advisory["cve_id"]] if advisory.get("cve_id") else []
|
||||
ghsa_ids = [advisory["ghsa_id"]] if advisory.get("ghsa_id") else []
|
||||
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=advisory.get("html_url") or API_URL,
|
||||
title=advisory.get("summary") or advisory.get("ghsa_id") or "GitHub advisory",
|
||||
published_at=advisory.get("published_at"),
|
||||
updated_at=advisory.get("updated_at"),
|
||||
summary=advisory.get("description") or "",
|
||||
severity=(advisory.get("severity") or "unknown").lower(),
|
||||
aliases=aliases,
|
||||
cve_ids=cve_ids,
|
||||
ghsa_ids=ghsa_ids,
|
||||
affected_versions=unique(affected_versions),
|
||||
fixed_versions=unique(fixed_versions),
|
||||
package_name=package_name,
|
||||
references=[advisory.get("html_url")] if advisory.get("html_url") else [],
|
||||
raw=advisory,
|
||||
)
|
||||
)
|
||||
|
||||
page += 1
|
||||
if len(advisories) < 100:
|
||||
break
|
||||
|
||||
return candidates
|
||||
@@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from html import unescape
|
||||
from typing import Any, Dict, List
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
ANCHOR_RE = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
|
||||
TAG_RE = re.compile(r"<[^>]+>")
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
keywords = {kw.lower() for kw in source.get("keywords", [])}
|
||||
|
||||
candidates: List[Candidate] = []
|
||||
seen = set()
|
||||
for href, text in ANCHOR_RE.findall(html):
|
||||
title = unescape(TAG_RE.sub(" ", text)).strip()
|
||||
if not title:
|
||||
continue
|
||||
absolute = urljoin(source["url"], href)
|
||||
haystack = f"{title} {absolute}".lower()
|
||||
if keywords and not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
if absolute in seen:
|
||||
continue
|
||||
seen.add(absolute)
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=absolute,
|
||||
title=title,
|
||||
summary="",
|
||||
severity="unknown",
|
||||
references=unique([absolute]),
|
||||
raw={"href": absolute, "title": title},
|
||||
)
|
||||
)
|
||||
if len(candidates) >= source.get("max_items", 50):
|
||||
break
|
||||
return candidates
|
||||
68
scripts/intel/sources/nvd_api.py
普通文件
68
scripts/intel/sources/nvd_api.py
普通文件
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
params = {
|
||||
"keywordSearch": source.get("keyword") or system["display_name"],
|
||||
"resultsPerPage": source.get("results_per_page", 50),
|
||||
}
|
||||
headers = {"User-Agent": "websafe-intel"}
|
||||
api_key = os.environ.get("NVD_API_KEY")
|
||||
if api_key:
|
||||
headers["apiKey"] = api_key
|
||||
|
||||
response = requests.get(API_URL, headers=headers, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
candidates: List[Candidate] = []
|
||||
for item in payload.get("vulnerabilities", []):
|
||||
cve = item.get("cve", {})
|
||||
descriptions = cve.get("descriptions", [])
|
||||
description = next((d.get("value") for d in descriptions if d.get("lang") == "en"), "")
|
||||
metrics = cve.get("metrics", {})
|
||||
severity = "unknown"
|
||||
cvss_score = None
|
||||
for key in ("cvssMetricV31", "cvssMetricV30", "cvssMetricV2"):
|
||||
entries = metrics.get(key, [])
|
||||
if entries:
|
||||
data = entries[0].get("cvssData", {})
|
||||
severity = (entries[0].get("baseSeverity") or data.get("baseSeverity") or "unknown").lower()
|
||||
cvss_score = data.get("baseScore")
|
||||
break
|
||||
|
||||
refs = [ref.get("url") for ref in cve.get("references", []) if ref.get("url")]
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=refs[0] if refs else API_URL,
|
||||
title=cve.get("id") or f"NVD advisory for {system['display_name']}",
|
||||
published_at=cve.get("published"),
|
||||
updated_at=cve.get("lastModified"),
|
||||
summary=description or "",
|
||||
severity=severity,
|
||||
cvss_score=cvss_score,
|
||||
aliases=unique([cve.get("id")]),
|
||||
cve_ids=[cve.get("id")] if cve.get("id") else [],
|
||||
references=refs,
|
||||
raw=item,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
154
scripts/intel/sources/osv_api.py
普通文件
154
scripts/intel/sources/osv_api.py
普通文件
@@ -0,0 +1,154 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
QUERY_BATCH_URL = "https://api.osv.dev/v1/querybatch"
|
||||
DETAIL_URL = "https://api.osv.dev/v1/vulns/{vuln_id}"
|
||||
CVSS_SCORE_RE = re.compile(r"/CVSS:3\.[01]/AV:[A-Z]/AC:[A-Z]/PR:[A-Z]/UI:[A-Z]/S:[A-Z]/C:[A-Z]/I:[A-Z]/A:[A-Z]")
|
||||
NUMERIC_SCORE_RE = re.compile(r"([0-9]+(?:\.[0-9]+)?)")
|
||||
|
||||
|
||||
def _fetch_detail(session: requests.Session, vuln_id: str) -> Dict[str, Any]:
|
||||
response = session.get(
|
||||
DETAIL_URL.format(vuln_id=vuln_id),
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _fixed_versions(vuln: Dict[str, Any]) -> List[str]:
|
||||
fixed = []
|
||||
for affected in vuln.get("affected", []):
|
||||
for rng in affected.get("ranges", []):
|
||||
for event in rng.get("events", []):
|
||||
if event.get("fixed"):
|
||||
fixed.append(event["fixed"])
|
||||
return unique(fixed)
|
||||
|
||||
|
||||
def _affected_versions(vuln: Dict[str, Any]) -> List[str]:
|
||||
versions = []
|
||||
ranges = []
|
||||
for affected in vuln.get("affected", []):
|
||||
versions.extend(affected.get("versions", [])[:20])
|
||||
for rng in affected.get("ranges", []):
|
||||
introduced = None
|
||||
fixed = None
|
||||
last_affected = None
|
||||
limit = None
|
||||
for event in rng.get("events", []):
|
||||
introduced = introduced or event.get("introduced")
|
||||
fixed = fixed or event.get("fixed")
|
||||
last_affected = last_affected or event.get("last_affected")
|
||||
limit = limit or event.get("limit")
|
||||
if introduced or fixed or last_affected or limit:
|
||||
parts = []
|
||||
if introduced:
|
||||
parts.append(f"introduced={introduced}")
|
||||
if last_affected:
|
||||
parts.append(f"last_affected={last_affected}")
|
||||
if fixed:
|
||||
parts.append(f"fixed<{fixed}")
|
||||
if limit:
|
||||
parts.append(f"limit<{limit}")
|
||||
ranges.append(", ".join(parts))
|
||||
return unique(versions + ranges)
|
||||
|
||||
|
||||
def _severity(vuln: Dict[str, Any]) -> tuple[str, float | None]:
|
||||
best_score = None
|
||||
for sev in vuln.get("severity", []):
|
||||
score = sev.get("score", "")
|
||||
match = NUMERIC_SCORE_RE.search(score)
|
||||
if match:
|
||||
try:
|
||||
best_score = float(match.group(1))
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
if best_score is None:
|
||||
return "unknown", None
|
||||
if best_score >= 9.0:
|
||||
return "critical", best_score
|
||||
if best_score >= 7.0:
|
||||
return "high", best_score
|
||||
if best_score >= 4.0:
|
||||
return "medium", best_score
|
||||
return "low", best_score
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
packages = system.get("package_names", [])
|
||||
if not packages:
|
||||
return []
|
||||
|
||||
queries = [{"package": {"name": pkg["name"], "ecosystem": pkg["ecosystem"]}} for pkg in packages]
|
||||
session = requests.Session()
|
||||
response = session.post(
|
||||
QUERY_BATCH_URL,
|
||||
json={"queries": queries},
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
detail_cache: Dict[str, Dict[str, Any]] = {}
|
||||
candidates: List[Candidate] = []
|
||||
for package, result in zip(packages, payload.get("results", [])):
|
||||
for summary in result.get("vulns", []):
|
||||
vuln_id = summary.get("id")
|
||||
if not vuln_id:
|
||||
continue
|
||||
if vuln_id not in detail_cache:
|
||||
detail_cache[vuln_id] = _fetch_detail(session, vuln_id)
|
||||
vuln = detail_cache[vuln_id]
|
||||
|
||||
aliases = unique(vuln.get("aliases", []) + [vuln.get("id")])
|
||||
refs = [ref.get("url") for ref in vuln.get("references", []) if ref.get("url")]
|
||||
severity, cvss_score = _severity(vuln)
|
||||
package_name = package["name"]
|
||||
if not package_name:
|
||||
for affected in vuln.get("affected", []):
|
||||
pkg = affected.get("package") or {}
|
||||
if pkg.get("name"):
|
||||
package_name = pkg["name"]
|
||||
break
|
||||
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=refs[0] if refs else DETAIL_URL.format(vuln_id=vuln_id),
|
||||
title=vuln.get("summary") or vuln.get("id") or f"OSV advisory for {package['name']}",
|
||||
published_at=vuln.get("published"),
|
||||
updated_at=vuln.get("modified"),
|
||||
summary=vuln.get("details") or "",
|
||||
severity=severity,
|
||||
cvss_score=cvss_score,
|
||||
aliases=aliases,
|
||||
cve_ids=[item for item in aliases if item and item.startswith("CVE-")],
|
||||
ghsa_ids=[item for item in aliases if item and item.startswith("GHSA-")],
|
||||
osv_ids=[vuln.get("id")] if vuln.get("id") else [],
|
||||
affected_versions=_affected_versions(vuln),
|
||||
fixed_versions=_fixed_versions(vuln),
|
||||
package_name=package_name,
|
||||
references=refs,
|
||||
raw=vuln,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
|
||||
|
||||
def _text(node: ET.Element, name: str) -> str:
|
||||
child = node.find(name)
|
||||
return child.text.strip() if child is not None and child.text else ""
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
keywords = {kw.lower() for kw in source.get("keywords", [])}
|
||||
items = root.findall(".//item")
|
||||
candidates: List[Candidate] = []
|
||||
for item in items[: source.get("max_items", 50)]:
|
||||
title = _text(item, "title")
|
||||
link = _text(item, "link") or source["url"]
|
||||
description = _text(item, "description")
|
||||
if keywords:
|
||||
haystack = " ".join([title, description]).lower()
|
||||
if not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=link,
|
||||
title=title or f"RSS entry for {system['display_name']}",
|
||||
published_at=_text(item, "pubDate"),
|
||||
updated_at=_text(item, "pubDate"),
|
||||
summary=description,
|
||||
severity="unknown",
|
||||
references=[link],
|
||||
raw={"title": title, "link": link},
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
57
scripts/intel/sources/runner.py
普通文件
57
scripts/intel/sources/runner.py
普通文件
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import parse_dt
|
||||
|
||||
from . import cisa_kev, github_global, html_links, nvd_api, osv_api, rss_feed
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
"ghsa-global": github_global.fetch,
|
||||
"osv-batch": osv_api.fetch,
|
||||
"kev-json": cisa_kev.fetch,
|
||||
"nvd-search": nvd_api.fetch,
|
||||
"rss-feed": rss_feed.fetch,
|
||||
"html-links": html_links.fetch,
|
||||
}
|
||||
|
||||
|
||||
def _passes_since(candidate: Candidate, since_dt: Optional[datetime], include_undated: bool) -> bool:
|
||||
if since_dt is None:
|
||||
return True
|
||||
timestamps = [parse_dt(candidate.updated_at), parse_dt(candidate.published_at)]
|
||||
valid = [item for item in timestamps if item is not None]
|
||||
if not valid:
|
||||
return include_undated
|
||||
return max(valid) >= since_dt
|
||||
|
||||
|
||||
def collect_candidates(
|
||||
source_map: Dict[str, Any],
|
||||
since_dt: Optional[datetime] = None,
|
||||
tier: Optional[str] = None,
|
||||
include_undated: bool = False,
|
||||
) -> Tuple[List[Candidate], List[str]]:
|
||||
all_candidates: List[Candidate] = []
|
||||
failures: List[str] = []
|
||||
|
||||
for system in source_map["systems"]:
|
||||
if tier and system.get("tier") != tier:
|
||||
continue
|
||||
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
|
||||
for source in system.get(bucket_name, []):
|
||||
handler = HANDLERS.get(source["kind"])
|
||||
if handler is None:
|
||||
failures.append(f"Unsupported source kind {source['kind']} for {system['system_id']}")
|
||||
continue
|
||||
try:
|
||||
items = handler(system, source)
|
||||
for item in items:
|
||||
if _passes_since(item, since_dt, include_undated):
|
||||
all_candidates.append(item)
|
||||
except Exception as exc:
|
||||
failures.append(f"{system['system_id']}::{source['name']}::{exc.__class__.__name__}")
|
||||
return all_candidates, failures
|
||||
150
scripts/intel/utils.py
普通文件
150
scripts/intel/utils.py
普通文件
@@ -0,0 +1,150 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
|
||||
UTC = timezone.utc
|
||||
|
||||
|
||||
def now_utc() -> datetime:
|
||||
return datetime.now(tz=UTC)
|
||||
|
||||
|
||||
def isoformat(dt: datetime) -> str:
|
||||
return dt.astimezone(UTC).replace(microsecond=0).isoformat()
|
||||
|
||||
|
||||
def parse_dt(value: Optional[str]) -> Optional[datetime]:
|
||||
if not value:
|
||||
return None
|
||||
if not isinstance(value, str):
|
||||
return None
|
||||
value = value.strip()
|
||||
if value.endswith("Z"):
|
||||
value = value[:-1] + "+00:00"
|
||||
for fmt in (
|
||||
None,
|
||||
"%a, %d %b %Y %H:%M:%S %z",
|
||||
"%Y-%m-%d",
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
):
|
||||
try:
|
||||
if fmt is None:
|
||||
parsed = datetime.fromisoformat(value)
|
||||
return parsed if parsed.tzinfo is not None else parsed.replace(tzinfo=UTC)
|
||||
parsed = datetime.strptime(value, fmt)
|
||||
return parsed if parsed.tzinfo is not None else parsed.replace(tzinfo=UTC)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def parse_since(value: str, default_days: int = 30) -> datetime:
|
||||
value = (value or "").strip()
|
||||
if not value:
|
||||
return now_utc() - timedelta(days=default_days)
|
||||
match = re.fullmatch(r"(\d+)d", value)
|
||||
if match:
|
||||
return now_utc() - timedelta(days=int(match.group(1)))
|
||||
parsed = parse_dt(value)
|
||||
if parsed:
|
||||
if parsed.tzinfo is None:
|
||||
return parsed.replace(tzinfo=UTC)
|
||||
return parsed.astimezone(UTC)
|
||||
raise ValueError(f"Unsupported --since value: {value}")
|
||||
|
||||
|
||||
def slugify(value: str) -> str:
|
||||
value = value.lower().strip()
|
||||
value = re.sub(r"[^a-z0-9]+", "-", value)
|
||||
value = re.sub(r"-+", "-", value).strip("-")
|
||||
return value or "item"
|
||||
|
||||
|
||||
def short_hash(*parts: str) -> str:
|
||||
digest = hashlib.sha1("::".join(parts).encode("utf-8")).hexdigest()
|
||||
return digest[:10]
|
||||
|
||||
|
||||
def ensure_dir(path: Path) -> None:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def read_json(path: Path, default: Any = None) -> Any:
|
||||
if not path.exists():
|
||||
return default
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def write_json(path: Path, data: Any) -> None:
|
||||
ensure_dir(path.parent)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(data, handle, indent=2, ensure_ascii=True, sort_keys=False)
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def write_text(path: Path, content: str) -> None:
|
||||
ensure_dir(path.parent)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
handle.write(content.rstrip() + "\n")
|
||||
|
||||
|
||||
def run(cmd: List[str], cwd: Optional[Path] = None, check: bool = True) -> subprocess.CompletedProcess:
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
cwd=str(cwd) if cwd else None,
|
||||
check=check,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
|
||||
def load_all_json(path: Path) -> List[Dict[str, Any]]:
|
||||
items: List[Dict[str, Any]] = []
|
||||
if not path.exists():
|
||||
return items
|
||||
for file_path in sorted(path.glob("*.json")):
|
||||
content = read_json(file_path, default=None)
|
||||
if isinstance(content, dict):
|
||||
items.append(content)
|
||||
return items
|
||||
|
||||
|
||||
def unique(values: Iterable[str]) -> List[str]:
|
||||
seen = set()
|
||||
result = []
|
||||
for value in values:
|
||||
if not value:
|
||||
continue
|
||||
if value in seen:
|
||||
continue
|
||||
seen.add(value)
|
||||
result.append(value)
|
||||
return result
|
||||
|
||||
|
||||
def severity_rank(value: Optional[str]) -> int:
|
||||
order = {
|
||||
"critical": 5,
|
||||
"high": 4,
|
||||
"important": 4,
|
||||
"medium": 3,
|
||||
"moderate": 3,
|
||||
"low": 2,
|
||||
"info": 1,
|
||||
"unknown": 0,
|
||||
None: 0,
|
||||
}
|
||||
return order.get((value or "").lower(), 0)
|
||||
|
||||
|
||||
def best_severity(values: Iterable[Optional[str]]) -> str:
|
||||
ordered = sorted(values, key=severity_rank, reverse=True)
|
||||
return next((value for value in ordered if value), "unknown")
|
||||
97
scripts/intel/validators.py
普通文件
97
scripts/intel/validators.py
普通文件
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from intel.config import FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, ROOT, SECURE_CODE_ROOT, SOURCE_MAP_PATH, SYSTEMS_DIR
|
||||
from intel.render import LANGUAGES, TOPIC_DESCRIPTIONS
|
||||
from intel.utils import load_all_json
|
||||
|
||||
|
||||
REQUIRED_REGISTRY_FIELDS = {
|
||||
"canonical_id",
|
||||
"system_id",
|
||||
"title",
|
||||
"severity",
|
||||
"source_confidence",
|
||||
"status",
|
||||
}
|
||||
|
||||
REQUIRED_SYSTEM_FIELDS = {
|
||||
"system_id",
|
||||
"display_name",
|
||||
"category",
|
||||
"tier",
|
||||
"advisory_modes",
|
||||
"official_sources",
|
||||
"ecosystem_sources",
|
||||
"research_sources",
|
||||
"output_dir",
|
||||
"secure_code_topics",
|
||||
"render_policy",
|
||||
}
|
||||
|
||||
|
||||
def validate(source_map: Dict[str, Any]) -> List[str]:
|
||||
errors: List[str] = []
|
||||
if not SOURCE_MAP_PATH.exists():
|
||||
errors.append("source-map.yaml is missing")
|
||||
|
||||
systems = source_map.get("systems", [])
|
||||
ids = set()
|
||||
groups = set()
|
||||
for system in systems:
|
||||
missing = REQUIRED_SYSTEM_FIELDS - set(system.keys())
|
||||
if missing:
|
||||
errors.append(f"system missing required fields: {system.get('system_id', 'unknown')} -> {sorted(missing)}")
|
||||
system_id = system["system_id"]
|
||||
if system_id in ids:
|
||||
errors.append(f"duplicate system_id: {system_id}")
|
||||
ids.add(system_id)
|
||||
output_dir = Path(system["output_dir"])
|
||||
if len(output_dir.parts) < 3:
|
||||
errors.append(f"invalid output_dir for {system_id}: {system['output_dir']}")
|
||||
continue
|
||||
groups.add(output_dir.parts[1])
|
||||
system_root = ROOT / output_dir
|
||||
if not (system_root / "README.md").exists():
|
||||
errors.append(f"system README missing: {system_root / 'README.md'}")
|
||||
if not (system_root / "INDEX.md").exists():
|
||||
errors.append(f"system INDEX missing: {system_root / 'INDEX.md'}")
|
||||
if not (SYSTEMS_DIR / f"{system_id}.json").exists():
|
||||
errors.append(f"system registry summary missing: {SYSTEMS_DIR / f'{system_id}.json'}")
|
||||
|
||||
if not (FRAMEWORK_ROOT / "README.md").exists():
|
||||
errors.append(f"framework root README missing: {FRAMEWORK_ROOT / 'README.md'}")
|
||||
for group in groups:
|
||||
if not (FRAMEWORK_ROOT / group / "README.md").exists():
|
||||
errors.append(f"group README missing: {FRAMEWORK_ROOT / group / 'README.md'}")
|
||||
|
||||
for item in load_all_json(REGISTRY_ROOT / "advisories"):
|
||||
missing = REQUIRED_REGISTRY_FIELDS - set(item.keys())
|
||||
if missing:
|
||||
errors.append(f"registry advisory missing fields: {item.get('canonical_id', 'unknown')} -> {sorted(missing)}")
|
||||
|
||||
for path in [
|
||||
GENERATED_DIR / "coverage-matrix.md",
|
||||
GENERATED_DIR / "latest-ingest.md",
|
||||
GENERATED_DIR / "run-summary.json",
|
||||
ROOT / "08-threat-intel" / "registry" / "source-confidence.md",
|
||||
]:
|
||||
if not path.exists():
|
||||
errors.append(f"generated artifact missing: {path}")
|
||||
|
||||
if not (SECURE_CODE_ROOT / "README.md").exists():
|
||||
errors.append(f"secure-code README missing: {SECURE_CODE_ROOT / 'README.md'}")
|
||||
|
||||
for language in LANGUAGES:
|
||||
language_dir = SECURE_CODE_ROOT / language
|
||||
if not (language_dir / "README.md").exists():
|
||||
errors.append(f"language README missing: {language_dir / 'README.md'}")
|
||||
if not (language_dir / "INDEX.md").exists():
|
||||
errors.append(f"language INDEX missing: {language_dir / 'INDEX.md'}")
|
||||
for topic in TOPIC_DESCRIPTIONS:
|
||||
if not (language_dir / f"{topic}.md").exists():
|
||||
errors.append(f"secure-code topic missing: {language_dir / f'{topic}.md'}")
|
||||
|
||||
return errors
|
||||
@@ -7,15 +7,17 @@
|
||||
# ./sync-gitea.sh --commit # 仅提交
|
||||
# ./sync-gitea.sh --push # 仅推送
|
||||
|
||||
set -e
|
||||
set -euo pipefail
|
||||
|
||||
# 配置
|
||||
REPO_DIR="/Users/x/websafe"
|
||||
GITEA_URL="https://git.hk.hao.work"
|
||||
REPO_NAME="websafe-kb"
|
||||
GITEA_TOKEN="267bc2e8b189b8fb6daf56e41a9e5ad47d543968"
|
||||
GIT_USER="hao"
|
||||
GIT_EMAIL="hao@users.noreply.git.hk.hao.work"
|
||||
GITEA_API="${GITEA_URL}/api/v1"
|
||||
REPO_NAME="${REPO_NAME:-websafe-kb}"
|
||||
REPO_DESC="${REPO_DESC:-授权攻防实验与研究知识库}"
|
||||
GITEA_TOKEN="${GITEA_TOKEN:-}"
|
||||
GIT_USER="${GIT_USER:-hao}"
|
||||
GIT_EMAIL="${GIT_EMAIL:-hao@users.noreply.git.hk.hao.work}"
|
||||
|
||||
cd "$REPO_DIR"
|
||||
|
||||
@@ -42,6 +44,39 @@ log_error() {
|
||||
echo -e "${RED}[ERROR]${END} $1"
|
||||
}
|
||||
|
||||
repo_api_url() {
|
||||
echo "${GITEA_API}/repos/${GIT_USER}/${REPO_NAME}"
|
||||
}
|
||||
|
||||
repo_git_url() {
|
||||
echo "${GITEA_URL}/${GIT_USER}/${REPO_NAME}.git"
|
||||
}
|
||||
|
||||
ensure_remote_repo() {
|
||||
if curl -fsS ${GITEA_TOKEN:+-H} ${GITEA_TOKEN:+"Authorization: token ${GITEA_TOKEN}"} "$(repo_api_url)" >/dev/null 2>&1; then
|
||||
log_info "远程仓库已存在: ${GIT_USER}/${REPO_NAME}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ -z "$GITEA_TOKEN" ]; then
|
||||
log_error "远程仓库不存在,且未提供 GITEA_TOKEN,无法自动创建"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_info "创建远程仓库: ${GIT_USER}/${REPO_NAME}"
|
||||
local payload
|
||||
payload=$(cat <<EOF
|
||||
{"name":"${REPO_NAME}","description":"${REPO_DESC}","private":false,"auto_init":false}
|
||||
EOF
|
||||
)
|
||||
curl -fsS -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$payload" \
|
||||
"${GITEA_API}/user/repos" >/dev/null
|
||||
log_success "远程仓库创建完成"
|
||||
}
|
||||
|
||||
# 初始化仓库
|
||||
init_repo() {
|
||||
log_info "初始化 Git 仓库..."
|
||||
@@ -57,17 +92,24 @@ init_repo() {
|
||||
|
||||
# 添加远程仓库
|
||||
if git remote | grep -q "origin"; then
|
||||
git remote set-url origin "${GITEA_URL}/${GIT_USER}/${REPO_NAME}.git"
|
||||
git remote set-url origin "$(repo_git_url)"
|
||||
log_info "远程仓库 URL 已更新"
|
||||
else
|
||||
git remote add origin "${GITEA_URL}/${GIT_USER}/${REPO_NAME}.git"
|
||||
git remote add origin "$(repo_git_url)"
|
||||
log_success "远程仓库已添加"
|
||||
fi
|
||||
|
||||
ensure_remote_repo
|
||||
|
||||
# 配置凭证
|
||||
git config credential.helper store
|
||||
echo "https://${GIT_USER}:${GITEA_TOKEN}@git.hk.hao.work" > ~/.git-credentials 2>/dev/null || true
|
||||
chmod 600 ~/.git-credentials 2>/dev/null || true
|
||||
# 凭证处理:
|
||||
# 默认不在仓库脚本中写入真实凭证。
|
||||
# 如需使用 token,请在运行时通过环境变量 GITEA_TOKEN 注入,
|
||||
# 推送时通过临时 HTTP Header 使用,不写入仓库或全局凭证文件。
|
||||
if [ -n "$GITEA_TOKEN" ]; then
|
||||
log_info "检测到 GITEA_TOKEN 环境变量,将在推送时临时注入 HTTP Header"
|
||||
else
|
||||
log_warning "未提供 GITEA_TOKEN;推送时将使用本机已有认证方式"
|
||||
fi
|
||||
|
||||
log_success "初始化完成"
|
||||
}
|
||||
@@ -111,8 +153,15 @@ push_changes() {
|
||||
branch="main"
|
||||
fi
|
||||
|
||||
ensure_remote_repo
|
||||
|
||||
# 推送
|
||||
if git push -u origin "$branch" 2>&1; then
|
||||
if [ -n "$GITEA_TOKEN" ]; then
|
||||
git -c http.extraHeader="Authorization: token ${GITEA_TOKEN}" push -u origin "$branch"
|
||||
else
|
||||
git push -u origin "$branch"
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
log_success "推送完成: $branch"
|
||||
else
|
||||
log_error "推送失败"
|
||||
@@ -122,6 +171,7 @@ push_changes() {
|
||||
|
||||
# 完整同步
|
||||
full_sync() {
|
||||
init_repo
|
||||
commit_changes
|
||||
push_changes
|
||||
}
|
||||
@@ -134,9 +184,17 @@ show_help() {
|
||||
echo " --init 初始化 Git 仓库"
|
||||
echo " --commit 仅提交更改"
|
||||
echo " --push 仅推送到远程"
|
||||
echo " --ensure 检查远程仓库;不存在则创建"
|
||||
echo " --status 显示仓库状态"
|
||||
echo " --help 显示此帮助"
|
||||
echo ""
|
||||
echo "环境变量:"
|
||||
echo " GITEA_TOKEN 可选;脚本不会自动写入 ~/.git-credentials"
|
||||
echo " GIT_USER 可选;默认 hao"
|
||||
echo " GIT_EMAIL 可选;默认 hao@users.noreply.git.hk.hao.work"
|
||||
echo " REPO_NAME 可选;默认 websafe-kb"
|
||||
echo " REPO_DESC 可选;默认 授权攻防实验与研究知识库"
|
||||
echo ""
|
||||
echo "无参数运行时执行完整同步 (提交 + 推送)"
|
||||
}
|
||||
|
||||
@@ -164,6 +222,9 @@ case "${1:-}" in
|
||||
--push)
|
||||
push_changes
|
||||
;;
|
||||
--ensure)
|
||||
init_repo
|
||||
;;
|
||||
--status)
|
||||
show_status
|
||||
;;
|
||||
@@ -173,4 +234,4 @@ case "${1:-}" in
|
||||
*)
|
||||
full_sync
|
||||
;;
|
||||
esac
|
||||
esac
|
||||
|
||||
89
scripts/validate-kb.py
可执行文件
89
scripts/validate-kb.py
可执行文件
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
validate-kb.py
|
||||
基础完整性检查脚本
|
||||
|
||||
检查内容:
|
||||
- README 中的绝对路径链接是否真实存在
|
||||
- 仓库中是否仍存在已知明文 token
|
||||
- 关键样例文件是否带有 LAB / AUTHORIZED 边界标记
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path("/Users/x/websafe")
|
||||
README = ROOT / "README.md"
|
||||
KNOWN_SECRET_PATTERNS = [
|
||||
re.compile(r'GITEA_TOKEN="(?!\$\{)[A-Fa-f0-9]{20,}"'),
|
||||
]
|
||||
BOUNDARY_FILES = [
|
||||
ROOT / "README.md",
|
||||
ROOT / "05-defense/hardening/nginx-hardening.conf",
|
||||
ROOT / "08-threat-intel/config-examples/github/.github/dependabot.yml",
|
||||
ROOT / "08-threat-intel/config-examples/github/.github/workflows/dependency-review.yml",
|
||||
ROOT / "04-server-security/infrastructure/tools/site-scope-mapper.py",
|
||||
]
|
||||
|
||||
|
||||
def check_readme_links() -> list[str]:
|
||||
errors = []
|
||||
content = README.read_text(encoding="utf-8")
|
||||
links = re.findall(r"\(/Users/x/websafe/[^)]+\)", content)
|
||||
for raw in links:
|
||||
path = Path(raw[1:-1].split("#", 1)[0])
|
||||
if not path.exists():
|
||||
errors.append(f"README link target missing: {path}")
|
||||
return errors
|
||||
|
||||
|
||||
def check_known_secrets() -> list[str]:
|
||||
errors = []
|
||||
for path in ROOT.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
if ".git" in path.parts:
|
||||
continue
|
||||
if path == ROOT / "scripts/validate-kb.py":
|
||||
continue
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
for pattern in KNOWN_SECRET_PATTERNS:
|
||||
if pattern.search(content):
|
||||
errors.append(f"Known secret pattern still present: {path}")
|
||||
return errors
|
||||
|
||||
|
||||
def check_boundary_markers() -> list[str]:
|
||||
errors = []
|
||||
for path in BOUNDARY_FILES:
|
||||
content = path.read_text(encoding="utf-8")
|
||||
if "LAB ONLY" not in content and "AUTHORIZED" not in content:
|
||||
errors.append(f"Boundary marker missing: {path}")
|
||||
return errors
|
||||
|
||||
|
||||
def main() -> int:
|
||||
errors = []
|
||||
errors.extend(check_readme_links())
|
||||
errors.extend(check_known_secrets())
|
||||
errors.extend(check_boundary_markers())
|
||||
|
||||
if errors:
|
||||
print("Validation failed:")
|
||||
for item in errors:
|
||||
print(f"- {item}")
|
||||
return 1
|
||||
|
||||
print("Validation passed.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
在新工单中引用
屏蔽一个用户