feat: sync version-driven intel coverage

这个提交包含在:
hao
2026-03-21 18:18:55 -07:00
父节点 2d92ef6bce
当前提交 bfd7d732ae
修改 8778 个文件,包含 354337 行新增4720 行删除

查看文件

@@ -151,6 +151,16 @@ def _release_inflight(cache_key: str) -> None:
event.set()
def _retry_delay_seconds(response: requests.Response, *, attempt: int, backoff_seconds: float) -> float:
retry_after = response.headers.get("Retry-After")
if retry_after:
try:
return max(backoff_seconds * attempt, min(15.0, float(retry_after) * attempt))
except ValueError:
pass
return backoff_seconds * attempt
def build_session(source: Dict[str, Any] | None = None) -> requests.Session:
session = requests.Session()
session.trust_env = True
@@ -220,6 +230,12 @@ def request(
verify=verify,
**kwargs,
)
if response.status_code == 429 and attempt < status_retries:
time.sleep(_retry_delay_seconds(response, attempt=attempt, backoff_seconds=backoff_seconds))
continue
if 500 <= response.status_code < 600 and attempt < status_retries:
time.sleep(backoff_seconds * attempt)
continue
if cacheable and response.ok:
_write_cached_response(cache_key, response)
return response

查看文件

@@ -20,6 +20,7 @@ run_pr() {
echo "[hourly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
python3 /Users/x/websafe/scripts/intel/main.py hotlane
python3 /Users/x/websafe/scripts/intel/main.py sync-versions
python3 /Users/x/websafe/scripts/lab/main.py run-batch --only-hotlane --limit "${WEBSAFE_HOTLANE_LIMIT:-10}"
python3 /Users/x/websafe/scripts/intel/main.py render
python3 /Users/x/websafe/scripts/intel/main.py validate

查看文件

@@ -19,6 +19,8 @@ run_pr() {
}
echo "[nightly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
python3 /Users/x/websafe/scripts/intel/main.py discover-entities
python3 /Users/x/websafe/scripts/intel/main.py sync-versions
python3 /Users/x/websafe/scripts/intel/main.py ingest --since last-success
python3 /Users/x/websafe/scripts/lab/main.py run-batch --limit "${WEBSAFE_NIGHTLY_LIMIT:-25}"
python3 /Users/x/websafe/scripts/intel/main.py render

查看文件

@@ -20,6 +20,7 @@ run_pr() {
echo "[weekly] $(date -u '+%Y-%m-%dT%H:%M:%SZ') starting"
python3 /Users/x/websafe/scripts/intel/main.py reconcile
python3 /Users/x/websafe/scripts/intel/main.py sync-versions --deep
python3 /Users/x/websafe/scripts/lab/main.py retry-failures --limit "${WEBSAFE_RETRY_LIMIT:-100}"
python3 /Users/x/websafe/scripts/lab/main.py run-batch --from-queue --limit "${WEBSAFE_WEEKLY_LIMIT:-50}"
python3 /Users/x/websafe/scripts/intel/main.py render

查看文件

@@ -3,7 +3,18 @@ from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
from intel.config import ENTITIES_DIR, FRAMEWORK_ROOT, GENERATED_DIR, REGISTRY_ROOT, REPRO_MAP_PATH, ROOT, SECURE_CODE_ROOT, SOURCE_MAP_PATH, SYSTEMS_DIR
from intel.config import (
ENTITIES_DIR,
FRAMEWORK_ROOT,
GENERATED_DIR,
REGISTRY_ROOT,
REPRO_MAP_PATH,
ROOT,
SECURE_CODE_ROOT,
SOURCE_MAP_PATH,
SYSTEMS_DIR,
VERSIONS_DIR,
)
from intel.render import LANGUAGES, TOPIC_DESCRIPTIONS
from intel.utils import load_all_json
@@ -34,10 +45,33 @@ REQUIRED_ENTITY_FIELDS = {
"category",
"status",
"history_policy",
"latest_version",
"latest_release_at",
"latest_release_url",
"version_source_refs",
"version_sync_status",
"security_version_count",
"last_version_synced_at",
"latest_version_evidence",
"latest_sync_status",
"official_source_covered",
}
REQUIRED_VERSION_FIELDS = {
"version_id",
"entity_id",
"root_system_id",
"version",
"released_at",
"release_url",
"source_name",
"source_confidence",
"security_relevant",
"reason",
"advisory_refs",
"is_latest_snapshot",
}
REQUIRED_SYSTEM_FIELDS = {
"system_id",
"display_name",
@@ -62,6 +96,11 @@ REQUIRED_SOURCE_FIELDS = {
"request_policy",
"health_policy",
"parser_hints",
"purpose",
"entity_type_hint",
"auto_catalog",
"version_mode",
"release_selector",
}
FORBIDDEN_RUNTIME_PATTERNS = [
@@ -133,6 +172,14 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
if missing:
errors.append(f"entity registry missing fields: {item.get('entity_id', 'unknown')} -> {sorted(missing)}")
version_items = load_all_json(VERSIONS_DIR)
if not version_items:
errors.append(f"version registry missing: {VERSIONS_DIR}")
for item in version_items:
missing = REQUIRED_VERSION_FIELDS - set(item.keys())
if missing:
errors.append(f"version registry missing fields: {item.get('version_id', 'unknown')} -> {sorted(missing)}")
for path in [
GENERATED_DIR / "coverage-matrix.md",
GENERATED_DIR / "latest-ingest.md",
@@ -148,6 +195,11 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
GENERATED_DIR / "entity-queues.json",
GENERATED_DIR / "entity-catalog-report.md",
GENERATED_DIR / "entity-discovery-backlog.md",
GENERATED_DIR / "version-completeness.json",
GENERATED_DIR / "version-backlog.json",
GENERATED_DIR / "release-index.json",
GENERATED_DIR / "version-sync-report.md",
GENERATED_DIR / "lab-enqueue-summary.json",
GENERATED_DIR / "dashboard" / "index.html",
GENERATED_DIR / "dashboard" / "overview" / "index.html",
GENERATED_DIR / "dashboard" / "runs" / "index.html",
@@ -179,6 +231,7 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
GENERATED_DIR / "dashboard" / "docs" / "retired-sources.html",
GENERATED_DIR / "dashboard" / "docs" / "entity-catalog-report.html",
GENERATED_DIR / "dashboard" / "docs" / "entity-discovery-backlog.html",
GENERATED_DIR / "dashboard" / "docs" / "version-sync-report.html",
GENERATED_DIR / "dashboard" / "docs" / "repro-map.html",
GENERATED_DIR / "dashboard" / "docs" / "coverage-matrix.html",
GENERATED_DIR / "dashboard" / "docs" / "design-source.html",
@@ -191,6 +244,10 @@ def validate(source_map: Dict[str, Any]) -> List[str]:
GENERATED_DIR / "dashboard" / "data" / "entity-completeness.json",
GENERATED_DIR / "dashboard" / "data" / "entity-discovery-backlog.json",
GENERATED_DIR / "dashboard" / "data" / "entity-queues.json",
GENERATED_DIR / "dashboard" / "data" / "version-completeness.json",
GENERATED_DIR / "dashboard" / "data" / "version-backlog.json",
GENERATED_DIR / "dashboard" / "data" / "release-index.json",
GENERATED_DIR / "dashboard" / "data" / "lab-enqueue-summary.json",
ROOT / "docs" / "testing-completeness-report.md",
ROOT / "08-threat-intel" / "registry" / "source-confidence.md",
]:

查看文件

@@ -269,16 +269,27 @@ def discover_entities(
merged_entities = sorted(entities.values(), key=lambda item: item["entity_id"])
if write_registry and auto_promoted:
_write_entity_records(merged_entities, selected_system_ids={item["root_system_id"] for item in merged_entities})
refreshed_views = build_entity_views(source_map, advisories)
if auto_promoted:
if write_registry and auto_promoted:
refreshed_views = build_entity_views(source_map, advisories)
candidate_backlog = refreshed_views["candidate_backlog"]
else:
promoted_urls = {
item.get("repo_url") or item.get("package_registry") or item.get("marketplace_url")
for item in auto_promoted
if item.get("repo_url") or item.get("package_registry") or item.get("marketplace_url")
}
candidate_backlog = [
item
for item in base_views["candidate_backlog"]
if (item.get("repo_url") or item.get("package_registry") or item.get("marketplace_url")) not in promoted_urls
]
return {
"entities": merged_entities,
"candidate_backlog": refreshed_views["candidate_backlog"],
"candidate_backlog": candidate_backlog,
"auto_promoted": auto_promoted,
"summary": {
"cataloged_entity_total": len([item for item in merged_entities if item.get("status") == "cataloged"]),
"candidate_entity_total": len(refreshed_views["candidate_backlog"]),
"candidate_entity_total": len(candidate_backlog),
"auto_promoted_count": len(auto_promoted),
},
}
@@ -815,6 +826,19 @@ def sync_versions(
entity["version_sync_status"] = best.get("version_sync_status") or entity.get("version_sync_status") or "pending"
entity["security_version_count"] = sum(item.get("security_version_count", 0) for item in children)
for version in version_records:
version["is_latest_snapshot"] = False
for entity in entities.values():
latest_version = (entity.get("latest_version") or "").strip()
if not latest_version:
continue
version_id = version_lookup.get((entity["entity_id"], latest_version))
if version_id:
for version in versions_by_entity.get(entity["entity_id"], []):
if version["version_id"] == version_id:
version["is_latest_snapshot"] = True
break
updated_advisories: List[Dict[str, Any]] = []
for advisory in advisory_rows:
target_id = _entity_target_id(advisory)