from __future__ import annotations from pathlib import Path from typing import Any, Dict, List from intel.config import ENTITIES_DIR, FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, REPRO_MAP_PATH, ROOT, SECURE_CODE_ROOT, SOURCE_MAP_PATH, SYSTEMS_DIR from intel.render import LANGUAGES, TOPIC_DESCRIPTIONS from intel.utils import load_all_json REQUIRED_REGISTRY_FIELDS = { "canonical_id", "system_id", "title", "severity", "source_confidence", "status", "verification_status", "verification_mode", "repro_profile_id", "entity_refs", "affected_components", "affected_version_ranges", "fixed_version_ranges", "version_confidence", "workflow", } REQUIRED_ENTITY_FIELDS = { "entity_id", "entity_type", "display_name", "root_system_id", "category", "status", "history_policy", "latest_sync_status", "official_source_covered", } REQUIRED_SYSTEM_FIELDS = { "system_id", "display_name", "category", "tier", "advisory_modes", "official_sources", "ecosystem_sources", "research_sources", "output_dir", "secure_code_topics", "render_policy", } REQUIRED_SOURCE_FIELDS = { "name", "kind", "confidence", "status", "retired_reason", "replacement_sources", "request_policy", "health_policy", "parser_hints", } FORBIDDEN_RUNTIME_PATTERNS = [ "assets-persist.lovart.ai", "cdnjs.cloudflare.com", "remixicon", "fonts.googleapis.com", "fonts.gstatic.com", ] def validate(source_map: Dict[str, Any]) -> List[str]: errors: List[str] = [] if not SOURCE_MAP_PATH.exists(): errors.append("source-map.yaml is missing") if not REPRO_MAP_PATH.exists(): errors.append("repro-map.yaml is missing") systems = source_map.get("systems", []) ids = set() groups = set() for system in systems: missing = REQUIRED_SYSTEM_FIELDS - set(system.keys()) if missing: errors.append(f"system missing required fields: {system.get('system_id', 'unknown')} -> {sorted(missing)}") system_id = system["system_id"] if system_id in ids: errors.append(f"duplicate system_id: {system_id}") ids.add(system_id) output_dir = Path(system["output_dir"]) if len(output_dir.parts) < 3: errors.append(f"invalid output_dir for {system_id}: {system['output_dir']}") continue groups.add(output_dir.parts[1]) system_root = ROOT / output_dir if not (system_root / "README.md").exists(): errors.append(f"system README missing: {system_root / 'README.md'}") if not (system_root / "INDEX.md").exists(): errors.append(f"system INDEX missing: {system_root / 'INDEX.md'}") if not (SYSTEMS_DIR / f"{system_id}.json").exists(): errors.append(f"system registry summary missing: {SYSTEMS_DIR / f'{system_id}.json'}") for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"): for source in system.get(bucket_name, []): missing_source_fields = REQUIRED_SOURCE_FIELDS - set(source.keys()) if missing_source_fields: errors.append(f"source missing required fields: {system_id}/{source.get('name', 'unknown')} -> {sorted(missing_source_fields)}") if not (FRAMEWORK_ROOT / "README.md").exists(): errors.append(f"framework root README missing: {FRAMEWORK_ROOT / 'README.md'}") for group in groups: if not (FRAMEWORK_ROOT / group / "README.md").exists(): errors.append(f"group README missing: {FRAMEWORK_ROOT / group / 'README.md'}") for item in load_all_json(REGISTRY_ROOT / "advisories"): missing = REQUIRED_REGISTRY_FIELDS - set(item.keys()) if missing: errors.append(f"registry advisory missing fields: {item.get('canonical_id', 'unknown')} -> {sorted(missing)}") workflow = item.get("workflow") or {} if not workflow.get("workflow_id"): errors.append(f"registry advisory workflow missing workflow_id: {item.get('canonical_id', 'unknown')}") if not workflow.get("vuln_family"): errors.append(f"registry advisory workflow missing vuln_family: {item.get('canonical_id', 'unknown')}") entity_items = load_all_json(ENTITIES_DIR) if not entity_items: errors.append(f"entity registry missing: {ENTITIES_DIR}") for item in entity_items: missing = REQUIRED_ENTITY_FIELDS - set(item.keys()) if missing: errors.append(f"entity registry missing fields: {item.get('entity_id', 'unknown')} -> {sorted(missing)}") for path in [ GENERATED_DIR / "coverage-matrix.md", GENERATED_DIR / "latest-ingest.md", GENERATED_DIR / "run-summary.json", GENERATED_DIR / "source-health.json", GENERATED_DIR / "alerts.json", GENERATED_DIR / "monitor-summary.json", GENERATED_DIR / "source-catalog-audit.json", GENERATED_DIR / "source-catalog-audit.md", GENERATED_DIR / "retired-sources.json", GENERATED_DIR / "entity-completeness.json", GENERATED_DIR / "entity-discovery-backlog.json", GENERATED_DIR / "entity-queues.json", GENERATED_DIR / "entity-catalog-report.md", GENERATED_DIR / "entity-discovery-backlog.md", GENERATED_DIR / "dashboard" / "index.html", GENERATED_DIR / "dashboard" / "overview" / "index.html", GENERATED_DIR / "dashboard" / "runs" / "index.html", GENERATED_DIR / "dashboard" / "systems" / "index.html", GENERATED_DIR / "dashboard" / "architecture" / "index.html", GENERATED_DIR / "dashboard" / "docs" / "index.html", GENERATED_DIR / "dashboard" / "data" / "index.html", GENERATED_DIR / "dashboard" / "legacy" / "index.html", GENERATED_DIR / "dashboard" / "summary.json", GENERATED_DIR / "dashboard" / "systems.json", GENERATED_DIR / "dashboard" / "runs.json", GENERATED_DIR / "dashboard" / "advisories.json", GENERATED_DIR / "dashboard" / "profiles.json", GENERATED_DIR / "dashboard" / "architecture.json", GENERATED_DIR / "dashboard" / "entities.json", GENERATED_DIR / "dashboard" / "assets" / "app.js", GENERATED_DIR / "dashboard" / "assets" / "styles.css", GENERATED_DIR / "dashboard" / "assets" / "icons.svg", GENERATED_DIR / "dashboard" / "assets" / "design-source.json", GENERATED_DIR / "dashboard" / "legacy" / "assets" / "app.js", GENERATED_DIR / "dashboard" / "legacy" / "assets" / "styles.css", GENERATED_DIR / "dashboard" / "docs" / "project-features.html", GENERATED_DIR / "dashboard" / "docs" / "frontend-dashboard-design.html", GENERATED_DIR / "dashboard" / "docs" / "secure-code-index.html", GENERATED_DIR / "dashboard" / "docs" / "root-readme.html", GENERATED_DIR / "dashboard" / "docs" / "authorization-model.html", GENERATED_DIR / "dashboard" / "docs" / "source-map.html", GENERATED_DIR / "dashboard" / "docs" / "source-catalog-audit.html", GENERATED_DIR / "dashboard" / "docs" / "retired-sources.html", GENERATED_DIR / "dashboard" / "docs" / "entity-catalog-report.html", GENERATED_DIR / "dashboard" / "docs" / "entity-discovery-backlog.html", GENERATED_DIR / "dashboard" / "docs" / "repro-map.html", GENERATED_DIR / "dashboard" / "docs" / "coverage-matrix.html", GENERATED_DIR / "dashboard" / "docs" / "design-source.html", GENERATED_DIR / "dashboard" / "docs" / "architecture-library.html", GENERATED_DIR / "dashboard" / "data" / "completeness.json", GENERATED_DIR / "dashboard" / "data" / "source-health.json", GENERATED_DIR / "dashboard" / "data" / "alerts.json", GENERATED_DIR / "dashboard" / "data" / "monitor-summary.json", GENERATED_DIR / "dashboard" / "data" / "source-catalog-audit.json", GENERATED_DIR / "dashboard" / "data" / "entity-completeness.json", GENERATED_DIR / "dashboard" / "data" / "entity-discovery-backlog.json", GENERATED_DIR / "dashboard" / "data" / "entity-queues.json", ROOT / "docs" / "testing-completeness-report.md", ROOT / "08-threat-intel" / "registry" / "source-confidence.md", ]: if not path.exists(): errors.append(f"generated artifact missing: {path}") monitoring_files = sorted((REGISTRY_ROOT / "monitoring").glob("*.json")) if not monitoring_files: errors.append(f"monitoring history missing: {REGISTRY_ROOT / 'monitoring'}") runtime_files = [ GENERATED_DIR / "dashboard" / "index.html", GENERATED_DIR / "dashboard" / "overview" / "index.html", GENERATED_DIR / "dashboard" / "runs" / "index.html", GENERATED_DIR / "dashboard" / "systems" / "index.html", GENERATED_DIR / "dashboard" / "architecture" / "index.html", GENERATED_DIR / "dashboard" / "docs" / "index.html", GENERATED_DIR / "dashboard" / "data" / "index.html", GENERATED_DIR / "dashboard" / "assets" / "app.js", GENERATED_DIR / "dashboard" / "assets" / "styles.css", GENERATED_DIR / "dashboard" / "legacy" / "index.html", GENERATED_DIR / "dashboard" / "legacy" / "assets" / "app.js", GENERATED_DIR / "dashboard" / "legacy" / "assets" / "styles.css", ] for runtime_file in runtime_files: if not runtime_file.exists(): continue content = runtime_file.read_text(encoding="utf-8") for pattern in FORBIDDEN_RUNTIME_PATTERNS: if pattern in content: errors.append(f"forbidden runtime dependency in {runtime_file}: {pattern}") if not (SECURE_CODE_ROOT / "README.md").exists(): errors.append(f"secure-code README missing: {SECURE_CODE_ROOT / 'README.md'}") for language in LANGUAGES: language_dir = SECURE_CODE_ROOT / language if not (language_dir / "README.md").exists(): errors.append(f"language README missing: {language_dir / 'README.md'}") if not (language_dir / "INDEX.md").exists(): errors.append(f"language INDEX missing: {language_dir / 'INDEX.md'}") for topic in TOPIC_DESCRIPTIONS: if not (language_dir / f"{topic}.md").exists(): errors.append(f"secure-code topic missing: {language_dir / f'{topic}.md'}") return errors