文件
websafe-kb/scripts/intel/validators.py
2026-03-21 18:18:55 -07:00

297 行
12 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
from intel.config import (
ENTITIES_DIR,
FRAMEWORK_ROOT,
GENERATED_DIR,
REGISTRY_ROOT,
REPRO_MAP_PATH,
ROOT,
SECURE_CODE_ROOT,
SOURCE_MAP_PATH,
SYSTEMS_DIR,
VERSIONS_DIR,
)
from intel.render import LANGUAGES, TOPIC_DESCRIPTIONS
from intel.utils import load_all_json
REQUIRED_REGISTRY_FIELDS = {
"canonical_id",
"system_id",
"title",
"severity",
"source_confidence",
"status",
"verification_status",
"verification_mode",
"repro_profile_id",
"entity_refs",
"affected_components",
"affected_version_ranges",
"fixed_version_ranges",
"version_confidence",
"workflow",
}
REQUIRED_ENTITY_FIELDS = {
"entity_id",
"entity_type",
"display_name",
"root_system_id",
"category",
"status",
"history_policy",
"latest_version",
"latest_release_at",
"latest_release_url",
"version_source_refs",
"version_sync_status",
"security_version_count",
"last_version_synced_at",
"latest_version_evidence",
"latest_sync_status",
"official_source_covered",
}
REQUIRED_VERSION_FIELDS = {
"version_id",
"entity_id",
"root_system_id",
"version",
"released_at",
"release_url",
"source_name",
"source_confidence",
"security_relevant",
"reason",
"advisory_refs",
"is_latest_snapshot",
}
REQUIRED_SYSTEM_FIELDS = {
"system_id",
"display_name",
"category",
"tier",
"advisory_modes",
"official_sources",
"ecosystem_sources",
"research_sources",
"output_dir",
"secure_code_topics",
"render_policy",
}
REQUIRED_SOURCE_FIELDS = {
"name",
"kind",
"confidence",
"status",
"retired_reason",
"replacement_sources",
"request_policy",
"health_policy",
"parser_hints",
"purpose",
"entity_type_hint",
"auto_catalog",
"version_mode",
"release_selector",
}
FORBIDDEN_RUNTIME_PATTERNS = [
"assets-persist.lovart.ai",
"cdnjs.cloudflare.com",
"remixicon",
"fonts.googleapis.com",
"fonts.gstatic.com",
]
def validate(source_map: Dict[str, Any]) -> List[str]:
errors: List[str] = []
if not SOURCE_MAP_PATH.exists():
errors.append("source-map.yaml is missing")
if not REPRO_MAP_PATH.exists():
errors.append("repro-map.yaml is missing")
systems = source_map.get("systems", [])
ids = set()
groups = set()
for system in systems:
missing = REQUIRED_SYSTEM_FIELDS - set(system.keys())
if missing:
errors.append(f"system missing required fields: {system.get('system_id', 'unknown')} -> {sorted(missing)}")
system_id = system["system_id"]
if system_id in ids:
errors.append(f"duplicate system_id: {system_id}")
ids.add(system_id)
output_dir = Path(system["output_dir"])
if len(output_dir.parts) < 3:
errors.append(f"invalid output_dir for {system_id}: {system['output_dir']}")
continue
groups.add(output_dir.parts[1])
system_root = ROOT / output_dir
if not (system_root / "README.md").exists():
errors.append(f"system README missing: {system_root / 'README.md'}")
if not (system_root / "INDEX.md").exists():
errors.append(f"system INDEX missing: {system_root / 'INDEX.md'}")
if not (SYSTEMS_DIR / f"{system_id}.json").exists():
errors.append(f"system registry summary missing: {SYSTEMS_DIR / f'{system_id}.json'}")
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
for source in system.get(bucket_name, []):
missing_source_fields = REQUIRED_SOURCE_FIELDS - set(source.keys())
if missing_source_fields:
errors.append(f"source missing required fields: {system_id}/{source.get('name', 'unknown')} -> {sorted(missing_source_fields)}")
if not (FRAMEWORK_ROOT / "README.md").exists():
errors.append(f"framework root README missing: {FRAMEWORK_ROOT / 'README.md'}")
for group in groups:
if not (FRAMEWORK_ROOT / group / "README.md").exists():
errors.append(f"group README missing: {FRAMEWORK_ROOT / group / 'README.md'}")
for item in load_all_json(REGISTRY_ROOT / "advisories"):
missing = REQUIRED_REGISTRY_FIELDS - set(item.keys())
if missing:
errors.append(f"registry advisory missing fields: {item.get('canonical_id', 'unknown')} -> {sorted(missing)}")
workflow = item.get("workflow") or {}
if not workflow.get("workflow_id"):
errors.append(f"registry advisory workflow missing workflow_id: {item.get('canonical_id', 'unknown')}")
if not workflow.get("vuln_family"):
errors.append(f"registry advisory workflow missing vuln_family: {item.get('canonical_id', 'unknown')}")
entity_items = load_all_json(ENTITIES_DIR)
if not entity_items:
errors.append(f"entity registry missing: {ENTITIES_DIR}")
for item in entity_items:
missing = REQUIRED_ENTITY_FIELDS - set(item.keys())
if missing:
errors.append(f"entity registry missing fields: {item.get('entity_id', 'unknown')} -> {sorted(missing)}")
version_items = load_all_json(VERSIONS_DIR)
if not version_items:
errors.append(f"version registry missing: {VERSIONS_DIR}")
for item in version_items:
missing = REQUIRED_VERSION_FIELDS - set(item.keys())
if missing:
errors.append(f"version registry missing fields: {item.get('version_id', 'unknown')} -> {sorted(missing)}")
for path in [
GENERATED_DIR / "coverage-matrix.md",
GENERATED_DIR / "latest-ingest.md",
GENERATED_DIR / "run-summary.json",
GENERATED_DIR / "source-health.json",
GENERATED_DIR / "alerts.json",
GENERATED_DIR / "monitor-summary.json",
GENERATED_DIR / "source-catalog-audit.json",
GENERATED_DIR / "source-catalog-audit.md",
GENERATED_DIR / "retired-sources.json",
GENERATED_DIR / "entity-completeness.json",
GENERATED_DIR / "entity-discovery-backlog.json",
GENERATED_DIR / "entity-queues.json",
GENERATED_DIR / "entity-catalog-report.md",
GENERATED_DIR / "entity-discovery-backlog.md",
GENERATED_DIR / "version-completeness.json",
GENERATED_DIR / "version-backlog.json",
GENERATED_DIR / "release-index.json",
GENERATED_DIR / "version-sync-report.md",
GENERATED_DIR / "lab-enqueue-summary.json",
GENERATED_DIR / "dashboard" / "index.html",
GENERATED_DIR / "dashboard" / "overview" / "index.html",
GENERATED_DIR / "dashboard" / "runs" / "index.html",
GENERATED_DIR / "dashboard" / "systems" / "index.html",
GENERATED_DIR / "dashboard" / "architecture" / "index.html",
GENERATED_DIR / "dashboard" / "docs" / "index.html",
GENERATED_DIR / "dashboard" / "data" / "index.html",
GENERATED_DIR / "dashboard" / "legacy" / "index.html",
GENERATED_DIR / "dashboard" / "summary.json",
GENERATED_DIR / "dashboard" / "systems.json",
GENERATED_DIR / "dashboard" / "runs.json",
GENERATED_DIR / "dashboard" / "advisories.json",
GENERATED_DIR / "dashboard" / "profiles.json",
GENERATED_DIR / "dashboard" / "architecture.json",
GENERATED_DIR / "dashboard" / "entities.json",
GENERATED_DIR / "dashboard" / "assets" / "app.js",
GENERATED_DIR / "dashboard" / "assets" / "styles.css",
GENERATED_DIR / "dashboard" / "assets" / "icons.svg",
GENERATED_DIR / "dashboard" / "assets" / "design-source.json",
GENERATED_DIR / "dashboard" / "legacy" / "assets" / "app.js",
GENERATED_DIR / "dashboard" / "legacy" / "assets" / "styles.css",
GENERATED_DIR / "dashboard" / "docs" / "project-features.html",
GENERATED_DIR / "dashboard" / "docs" / "frontend-dashboard-design.html",
GENERATED_DIR / "dashboard" / "docs" / "secure-code-index.html",
GENERATED_DIR / "dashboard" / "docs" / "root-readme.html",
GENERATED_DIR / "dashboard" / "docs" / "authorization-model.html",
GENERATED_DIR / "dashboard" / "docs" / "source-map.html",
GENERATED_DIR / "dashboard" / "docs" / "source-catalog-audit.html",
GENERATED_DIR / "dashboard" / "docs" / "retired-sources.html",
GENERATED_DIR / "dashboard" / "docs" / "entity-catalog-report.html",
GENERATED_DIR / "dashboard" / "docs" / "entity-discovery-backlog.html",
GENERATED_DIR / "dashboard" / "docs" / "version-sync-report.html",
GENERATED_DIR / "dashboard" / "docs" / "repro-map.html",
GENERATED_DIR / "dashboard" / "docs" / "coverage-matrix.html",
GENERATED_DIR / "dashboard" / "docs" / "design-source.html",
GENERATED_DIR / "dashboard" / "docs" / "architecture-library.html",
GENERATED_DIR / "dashboard" / "data" / "completeness.json",
GENERATED_DIR / "dashboard" / "data" / "source-health.json",
GENERATED_DIR / "dashboard" / "data" / "alerts.json",
GENERATED_DIR / "dashboard" / "data" / "monitor-summary.json",
GENERATED_DIR / "dashboard" / "data" / "source-catalog-audit.json",
GENERATED_DIR / "dashboard" / "data" / "entity-completeness.json",
GENERATED_DIR / "dashboard" / "data" / "entity-discovery-backlog.json",
GENERATED_DIR / "dashboard" / "data" / "entity-queues.json",
GENERATED_DIR / "dashboard" / "data" / "version-completeness.json",
GENERATED_DIR / "dashboard" / "data" / "version-backlog.json",
GENERATED_DIR / "dashboard" / "data" / "release-index.json",
GENERATED_DIR / "dashboard" / "data" / "lab-enqueue-summary.json",
ROOT / "docs" / "testing-completeness-report.md",
ROOT / "08-threat-intel" / "registry" / "source-confidence.md",
]:
if not path.exists():
errors.append(f"generated artifact missing: {path}")
monitoring_files = sorted((REGISTRY_ROOT / "monitoring").glob("*.json"))
if not monitoring_files:
errors.append(f"monitoring history missing: {REGISTRY_ROOT / 'monitoring'}")
runtime_files = [
GENERATED_DIR / "dashboard" / "index.html",
GENERATED_DIR / "dashboard" / "overview" / "index.html",
GENERATED_DIR / "dashboard" / "runs" / "index.html",
GENERATED_DIR / "dashboard" / "systems" / "index.html",
GENERATED_DIR / "dashboard" / "architecture" / "index.html",
GENERATED_DIR / "dashboard" / "docs" / "index.html",
GENERATED_DIR / "dashboard" / "data" / "index.html",
GENERATED_DIR / "dashboard" / "assets" / "app.js",
GENERATED_DIR / "dashboard" / "assets" / "styles.css",
GENERATED_DIR / "dashboard" / "legacy" / "index.html",
GENERATED_DIR / "dashboard" / "legacy" / "assets" / "app.js",
GENERATED_DIR / "dashboard" / "legacy" / "assets" / "styles.css",
]
for runtime_file in runtime_files:
if not runtime_file.exists():
continue
content = runtime_file.read_text(encoding="utf-8")
for pattern in FORBIDDEN_RUNTIME_PATTERNS:
if pattern in content:
errors.append(f"forbidden runtime dependency in {runtime_file}: {pattern}")
if not (SECURE_CODE_ROOT / "README.md").exists():
errors.append(f"secure-code README missing: {SECURE_CODE_ROOT / 'README.md'}")
for language in LANGUAGES:
language_dir = SECURE_CODE_ROOT / language
if not (language_dir / "README.md").exists():
errors.append(f"language README missing: {language_dir / 'README.md'}")
if not (language_dir / "INDEX.md").exists():
errors.append(f"language INDEX missing: {language_dir / 'INDEX.md'}")
for topic in TOPIC_DESCRIPTIONS:
if not (language_dir / f"{topic}.md").exists():
errors.append(f"secure-code topic missing: {language_dir / f'{topic}.md'}")
return errors